├── _basic ├── iso │ ├── source_code │ │ ├── Pair_entropy.dat │ │ ├── readdata.mod │ │ ├── SOLUTION │ │ │ ├── readdata.mod │ │ │ ├── dcdread.h │ │ │ └── rdf.f90 │ │ ├── Makefile │ │ ├── dcdread.h │ │ └── rdf.f90 │ └── Presentations │ │ └── README.md ├── openacc │ ├── source_code │ │ ├── Pair_entropy.dat │ │ ├── readdata.mod │ │ ├── SOLUTION │ │ │ ├── readdata.mod │ │ │ ├── dcdread.h │ │ │ ├── rdf_parallel_directive.f90 │ │ │ ├── rdf_kernel_directive.f90 │ │ │ ├── rdf_collapse.f90 │ │ │ └── rdf_data_directive.f90 │ │ ├── Makefile │ │ ├── dcdread.h │ │ └── rdf.f90 │ └── Presentations │ │ └── README.md ├── openmp │ ├── source_code │ │ ├── Pair_entropy.dat │ │ ├── readdata.mod │ │ ├── SOLUTION │ │ │ ├── readdata.mod │ │ │ ├── dcdread.h │ │ │ ├── rdf_orig.f90 │ │ │ ├── rdf_offload.f90 │ │ │ ├── rdf_offload_loop.f90 │ │ │ ├── rdf_offload_collapse.f90 │ │ │ └── rdf_offload_split.f90 │ │ ├── Makefile │ │ ├── dcdread.h │ │ └── rdf.f90 │ └── Presentations │ │ └── README.md ├── _common │ ├── input │ │ └── .gitignore │ ├── images │ │ ├── UM.png │ │ ├── cpu.png │ │ ├── rdf.png │ │ ├── sol.png │ │ ├── cuda.png │ │ ├── cupy.JPG │ │ ├── f_sol.png │ │ ├── grid.png │ │ ├── matrix.png │ │ ├── memory.png │ │ ├── nvtx.PNG │ │ ├── serial.jpg │ │ ├── serial.png │ │ ├── thread.png │ │ ├── ufunc.png │ │ ├── 2d_array.png │ │ ├── cli-out.png │ │ ├── compute.png │ │ ├── dcdfile.png │ │ ├── diagram.png │ │ ├── gang_128.png │ │ ├── gang_256.png │ │ ├── gang_32.png │ │ ├── laplas3.png │ │ ├── mapping.png │ │ ├── nvtx_gpu.png │ │ ├── pair_gpu.png │ │ ├── workflow.png │ │ ├── 2d_col_mult.png │ │ ├── SOL-compute.png │ │ ├── cuda_cupy.png │ │ ├── cuda_hw_sw.png │ │ ├── cupy_arch.png │ │ ├── cupy_intro.png │ │ ├── cupy_nsys1.png │ │ ├── cupy_nsys2.png │ │ ├── cupy_nsys3.png │ │ ├── data_thread.png │ │ ├── f_gang_128.png │ │ ├── f_gang_32.png │ │ ├── gang_vector.png │ │ ├── jacobi_algo.jpg │ │ ├── matrix_grid.png │ │ ├── ngc_error.PNG │ │ ├── nsight_open.png │ │ ├── numba_nsys1.png │ │ ├── numba_nsys2.png │ │ ├── nvtx_serial.jpg │ │ ├── nvtx_serial.png │ │ ├── openmp_gpu.png │ │ ├── parallel1f.png │ │ ├── parallel2f.png │ │ ├── parallel3f.png │ │ ├── raw_kernel.png │ │ ├── source_loc.png │ │ ├── source_sass.png │ │ ├── stdpar_gpu.png │ │ ├── stdpar_um.png │ │ ├── coalesced_mem.png │ │ ├── collapse_pre.png │ │ ├── compute-cli-1.png │ │ ├── compute-cli-2.png │ │ ├── compute-open.png │ │ ├── compute-sets.png │ │ ├── compute_open.png │ │ ├── compute_split.png │ │ ├── cuda_indexing.png │ │ ├── cuda_profile.png │ │ ├── cuda_vec_add.png │ │ ├── cuda_vec_add2.png │ │ ├── cupy_summary.png │ │ ├── data_feedback.png │ │ ├── f_data_thread.png │ │ ├── f_gang_vector.png │ │ ├── f_memory_sec.png │ │ ├── f_openmp_gpu.png │ │ ├── f_source_loc.png │ │ ├── f_source_sass.png │ │ ├── git_branching.jpg │ │ ├── gpu_feedback.png │ │ ├── jacobi_uncore.png │ │ ├── matrix_block.png │ │ ├── numba_summary.png │ │ ├── openmp_teams.png │ │ ├── output_files.png │ │ ├── page-compute.png │ │ ├── parallel_data.jpg │ │ ├── parallel_data.png │ │ ├── parallel_loop.png │ │ ├── rule-compute.png │ │ ├── sass-compute.png │ │ ├── sol_baseline.png │ │ ├── source_hover.png │ │ ├── thread_blocks.JPG │ │ ├── thread_blocks.png │ │ ├── warp_collapse.png │ │ ├── Nsight Diagram.png │ │ ├── baseline-compute.png │ │ ├── charts-compute.png │ │ ├── collapse_thread.png │ │ ├── compute-memory.png │ │ ├── compute-memtable.png │ │ ├── compute-sections.png │ │ ├── compute_analyz.png │ │ ├── compute_collapse.png │ │ ├── compute_command.png │ │ ├── cuda_profile_api.png │ │ ├── expand-compute.png │ │ ├── f_compute_analyz.png │ │ ├── f_offload_grid.png │ │ ├── f_sol_baseline.png │ │ ├── f_source_hover.png │ │ ├── github_structure.png │ │ ├── grace_hopper_ATS.jpg │ │ ├── header-compute.png │ │ ├── jacobi_cuda_api.png │ │ ├── kernel_feedback.png │ │ ├── kokkos_ecosystem.png │ │ ├── launch-compute.png │ │ ├── memory-compute.png │ │ ├── numba_summary1.png │ │ ├── nvtx_multicore.jpg │ │ ├── nvtx_multicore.png │ │ ├── openacc_parallel.png │ │ ├── openmp_feedback.png │ │ ├── openmp_fork_join.png │ │ ├── openmp_multicore.png │ │ ├── openmp_teams_for.png │ │ ├── openmp_warp_cmp.png │ │ ├── parallel_expand.jpg │ │ ├── parallel_expand.png │ │ ├── parallel_unified.jpg │ │ ├── parallel_unified.png │ │ ├── rapids_package.png │ │ ├── roofline-compute.png │ │ ├── sections-compute.png │ │ ├── serial_cpu_rdf1.png │ │ ├── serial_cpu_rdf2.png │ │ ├── serial_output1.png │ │ ├── serial_profile.png │ │ ├── serial_profiler1.png │ │ ├── source-compute.png │ │ ├── source_collapse.png │ │ ├── stdpar_multicore.png │ │ ├── summary-compute.png │ │ ├── thread_position.png │ │ ├── uncoalesced_hint.png │ │ ├── unified_memory.png │ │ ├── warning-compute.png │ │ ├── Optimization_Cycle.jpg │ │ ├── allsection-compute.png │ │ ├── baseline1-compute.png │ │ ├── collapse_feedback.png │ │ ├── cupy_kernel_memory.png │ │ ├── do_concurrent_gpu.jpg │ │ ├── f_collapse_thread.png │ │ ├── f_compute_command.png │ │ ├── f_memory_collapse.png │ │ ├── f_openmp_multicore.png │ │ ├── f_openmp_warp_cmp.png │ │ ├── grace_hopper_arch.jpg │ │ ├── kokkos_abstraction.png │ │ ├── kokkos_mirror_view.png │ │ ├── numba_output_files.png │ │ ├── openacc_construct.jpg │ │ ├── openacc_construct.png │ │ ├── openacc_copyclause.png │ │ ├── openacc_parallel2.png │ │ ├── pair_gpu_analysis.png │ │ ├── parallel_detailed.png │ │ ├── parallel_timeline.jpg │ │ ├── parallel_timeline.png │ │ ├── roofline-achieved.png │ │ ├── roofline-analysis.png │ │ ├── roofline-baseline.png │ │ ├── roofline-overview.png │ │ ├── roofline_collapse.png │ │ ├── scheduler_collapse.png │ │ ├── serial_output_file.png │ │ ├── C2C_Profiler_Output.jpg │ │ ├── compute_command_line.png │ │ ├── cuda_profile_timeline.jpg │ │ ├── cuda_profile_timeline.png │ │ ├── f_collapse_feedback.png │ │ ├── f_openmp_collapse_reg.png │ │ ├── f_openmp_gpu_collapse.png │ │ ├── f_roofline_collapse.png │ │ ├── grace_hopper_numactl.jpg │ │ ├── kernel_indep_feedback.png │ │ ├── memory_architecture.png │ │ ├── nsys-compute-command.png │ │ ├── nsys-compute-command1.png │ │ ├── nsys-compute-command2.png │ │ ├── nvtx_multicore (copy).png │ │ ├── openacc correlation.jpg │ │ ├── openacc correlation.png │ │ ├── openacc_3_directives.png │ │ ├── openacc_parallel_loop.png │ │ ├── openmp_collapse_reg.png │ │ ├── openmp_gpu_collapse.png │ │ ├── openmp_target_teams.png │ │ ├── serial_cupy_profile.png │ │ ├── serial_numba_profile.png │ │ ├── source_sass_collapse.png │ │ ├── do_concurrent_multicore.jpg │ │ ├── f_offload_compare_nvtx.png │ │ ├── gpu_programming_process.png │ │ ├── heterogeneous_computing.jpg │ │ ├── jacobi_solution_uncore.png │ │ ├── openmp_offload_collapse.png │ │ ├── openmp_offload_roofline.png │ │ ├── parallel_data_feedback.png │ │ ├── compute_collapse_roofline.png │ │ ├── f_openacc_data_directive.png │ │ ├── f_openmp_collapse_baseline.png │ │ ├── f_openmp_offload_collapse.png │ │ ├── f_openmp_offload_occupancy.png │ │ ├── f_openmp_offload_roofline.png │ │ ├── f_openmp_offload_split_cmp.png │ │ ├── grace_hopper_global_access.jpg │ │ ├── jacobi_solution_cuda_api.png │ │ ├── openacc_multicore_feedback.png │ │ ├── openmp_collapse_baseline.png │ │ ├── openmp_collapse_reg_memory.png │ │ ├── openmp_feedback_collapse.png │ │ ├── openmp_feedback_multicore.png │ │ ├── openmp_offload_occupancy.png │ │ ├── openmp_offload_split_cmp.png │ │ ├── openmp_offload_split_cmp2.png │ │ ├── openmp_offload_split_grid.png │ │ ├── openmp_parallel_construct.png │ │ ├── openmp_target_distribute.png │ │ ├── f_openmp_collapse_reg_memory.png │ │ ├── f_openmp_offload_split_cmp2.png │ │ ├── f_openmp_offload_split_grid.png │ │ ├── grace_hopper_page_migration.jpg │ │ ├── openmp_collapse_reg_roofline.png │ │ ├── openmp_parallelfor_construct.png │ │ ├── f_openmp_collapse_reg_occupancy.png │ │ ├── f_openmp_collapse_reg_roofline.png │ │ ├── f_openmp_feedback_offload_split.png │ │ ├── openmp_collapse_reg_occupancy.png │ │ └── openmp_feedback_offload_split.png │ ├── dataset.py │ ├── dataset_python.py │ └── source_code │ │ ├── Makefile │ │ ├── dcdread.h │ │ └── rdf.f90 ├── python │ ├── source_code │ │ └── input │ │ │ └── .gitignore │ └── Presentations │ │ └── README.md ├── memory_coherent │ └── source_code │ │ ├── jacobi_report.nsys-rep │ │ ├── jacobi_report.sqlite │ │ ├── unified_test.cu │ │ └── Makefile ├── cuda │ ├── Presentations │ │ └── README.md │ └── source_code │ │ ├── dcdread.h │ │ ├── SOLUTION │ │ └── dcdread.h │ │ ├── vector_addition_gpu_block_only.cu │ │ ├── vector_addition_gpu_thread_only.cu │ │ └── vector_addition_gpu_thread_block.cu ├── kokkos │ ├── Presentations │ │ └── README.md │ └── source_code │ │ └── dcdread.h ├── LICENSE └── _start_nways.ipynb ├── .gitignore ├── _scripts ├── nways_Dockerfile ├── cuda_Singularity ├── nways_Singularity ├── nways_Dockerfile_python └── nways_Singularity_python ├── nways_Dockerfile ├── Repo_Structure.md ├── nways_Singularity ├── README.md ├── nways_Dockerfile_python ├── nways_Singularity_python └── Deployment_Guide.md /_basic/iso/source_code/Pair_entropy.dat: -------------------------------------------------------------------------------- 1 | s2 value is -2.43191 2 | s2bond value is -3.87014 3 | -------------------------------------------------------------------------------- /_basic/openacc/source_code/Pair_entropy.dat: -------------------------------------------------------------------------------- 1 | s2 value is -2.43191 2 | s2bond value is -3.87014 3 | -------------------------------------------------------------------------------- /_basic/openmp/source_code/Pair_entropy.dat: -------------------------------------------------------------------------------- 1 | s2 value is -2.43191 2 | s2bond value is -3.87014 3 | -------------------------------------------------------------------------------- /_basic/_common/input/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore -------------------------------------------------------------------------------- /_basic/python/source_code/input/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore -------------------------------------------------------------------------------- /_basic/_common/images/UM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/UM.png -------------------------------------------------------------------------------- /_basic/_common/images/cpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cpu.png -------------------------------------------------------------------------------- /_basic/_common/images/rdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/rdf.png -------------------------------------------------------------------------------- /_basic/_common/images/sol.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/sol.png -------------------------------------------------------------------------------- /_basic/_common/images/cuda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cuda.png -------------------------------------------------------------------------------- /_basic/_common/images/cupy.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cupy.JPG -------------------------------------------------------------------------------- /_basic/_common/images/f_sol.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_sol.png -------------------------------------------------------------------------------- /_basic/_common/images/grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/grid.png -------------------------------------------------------------------------------- /_basic/_common/images/matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/matrix.png -------------------------------------------------------------------------------- /_basic/_common/images/memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/memory.png -------------------------------------------------------------------------------- /_basic/_common/images/nvtx.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nvtx.PNG -------------------------------------------------------------------------------- /_basic/_common/images/serial.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/serial.jpg -------------------------------------------------------------------------------- /_basic/_common/images/serial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/serial.png -------------------------------------------------------------------------------- /_basic/_common/images/thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/thread.png -------------------------------------------------------------------------------- /_basic/_common/images/ufunc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/ufunc.png -------------------------------------------------------------------------------- /_basic/_common/images/2d_array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/2d_array.png -------------------------------------------------------------------------------- /_basic/_common/images/cli-out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cli-out.png -------------------------------------------------------------------------------- /_basic/_common/images/compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute.png -------------------------------------------------------------------------------- /_basic/_common/images/dcdfile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/dcdfile.png -------------------------------------------------------------------------------- /_basic/_common/images/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/diagram.png -------------------------------------------------------------------------------- /_basic/_common/images/gang_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/gang_128.png -------------------------------------------------------------------------------- /_basic/_common/images/gang_256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/gang_256.png -------------------------------------------------------------------------------- /_basic/_common/images/gang_32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/gang_32.png -------------------------------------------------------------------------------- /_basic/_common/images/laplas3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/laplas3.png -------------------------------------------------------------------------------- /_basic/_common/images/mapping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/mapping.png -------------------------------------------------------------------------------- /_basic/_common/images/nvtx_gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nvtx_gpu.png -------------------------------------------------------------------------------- /_basic/_common/images/pair_gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/pair_gpu.png -------------------------------------------------------------------------------- /_basic/_common/images/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/workflow.png -------------------------------------------------------------------------------- /_basic/_common/images/2d_col_mult.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/2d_col_mult.png -------------------------------------------------------------------------------- /_basic/_common/images/SOL-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/SOL-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/cuda_cupy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cuda_cupy.png -------------------------------------------------------------------------------- /_basic/_common/images/cuda_hw_sw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cuda_hw_sw.png -------------------------------------------------------------------------------- /_basic/_common/images/cupy_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cupy_arch.png -------------------------------------------------------------------------------- /_basic/_common/images/cupy_intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cupy_intro.png -------------------------------------------------------------------------------- /_basic/_common/images/cupy_nsys1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cupy_nsys1.png -------------------------------------------------------------------------------- /_basic/_common/images/cupy_nsys2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cupy_nsys2.png -------------------------------------------------------------------------------- /_basic/_common/images/cupy_nsys3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cupy_nsys3.png -------------------------------------------------------------------------------- /_basic/_common/images/data_thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/data_thread.png -------------------------------------------------------------------------------- /_basic/_common/images/f_gang_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_gang_128.png -------------------------------------------------------------------------------- /_basic/_common/images/f_gang_32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_gang_32.png -------------------------------------------------------------------------------- /_basic/_common/images/gang_vector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/gang_vector.png -------------------------------------------------------------------------------- /_basic/_common/images/jacobi_algo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/jacobi_algo.jpg -------------------------------------------------------------------------------- /_basic/_common/images/matrix_grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/matrix_grid.png -------------------------------------------------------------------------------- /_basic/_common/images/ngc_error.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/ngc_error.PNG -------------------------------------------------------------------------------- /_basic/_common/images/nsight_open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nsight_open.png -------------------------------------------------------------------------------- /_basic/_common/images/numba_nsys1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/numba_nsys1.png -------------------------------------------------------------------------------- /_basic/_common/images/numba_nsys2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/numba_nsys2.png -------------------------------------------------------------------------------- /_basic/_common/images/nvtx_serial.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nvtx_serial.jpg -------------------------------------------------------------------------------- /_basic/_common/images/nvtx_serial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nvtx_serial.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_gpu.png -------------------------------------------------------------------------------- /_basic/_common/images/parallel1f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel1f.png -------------------------------------------------------------------------------- /_basic/_common/images/parallel2f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel2f.png -------------------------------------------------------------------------------- /_basic/_common/images/parallel3f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel3f.png -------------------------------------------------------------------------------- /_basic/_common/images/raw_kernel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/raw_kernel.png -------------------------------------------------------------------------------- /_basic/_common/images/source_loc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/source_loc.png -------------------------------------------------------------------------------- /_basic/_common/images/source_sass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/source_sass.png -------------------------------------------------------------------------------- /_basic/_common/images/stdpar_gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/stdpar_gpu.png -------------------------------------------------------------------------------- /_basic/_common/images/stdpar_um.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/stdpar_um.png -------------------------------------------------------------------------------- /_basic/iso/source_code/readdata.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/iso/source_code/readdata.mod -------------------------------------------------------------------------------- /_basic/_common/images/coalesced_mem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/coalesced_mem.png -------------------------------------------------------------------------------- /_basic/_common/images/collapse_pre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/collapse_pre.png -------------------------------------------------------------------------------- /_basic/_common/images/compute-cli-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute-cli-1.png -------------------------------------------------------------------------------- /_basic/_common/images/compute-cli-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute-cli-2.png -------------------------------------------------------------------------------- /_basic/_common/images/compute-open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute-open.png -------------------------------------------------------------------------------- /_basic/_common/images/compute-sets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute-sets.png -------------------------------------------------------------------------------- /_basic/_common/images/compute_open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute_open.png -------------------------------------------------------------------------------- /_basic/_common/images/compute_split.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute_split.png -------------------------------------------------------------------------------- /_basic/_common/images/cuda_indexing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cuda_indexing.png -------------------------------------------------------------------------------- /_basic/_common/images/cuda_profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cuda_profile.png -------------------------------------------------------------------------------- /_basic/_common/images/cuda_vec_add.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cuda_vec_add.png -------------------------------------------------------------------------------- /_basic/_common/images/cuda_vec_add2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cuda_vec_add2.png -------------------------------------------------------------------------------- /_basic/_common/images/cupy_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cupy_summary.png -------------------------------------------------------------------------------- /_basic/_common/images/data_feedback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/data_feedback.png -------------------------------------------------------------------------------- /_basic/_common/images/f_data_thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_data_thread.png -------------------------------------------------------------------------------- /_basic/_common/images/f_gang_vector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_gang_vector.png -------------------------------------------------------------------------------- /_basic/_common/images/f_memory_sec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_memory_sec.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_gpu.png -------------------------------------------------------------------------------- /_basic/_common/images/f_source_loc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_source_loc.png -------------------------------------------------------------------------------- /_basic/_common/images/f_source_sass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_source_sass.png -------------------------------------------------------------------------------- /_basic/_common/images/git_branching.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/git_branching.jpg -------------------------------------------------------------------------------- /_basic/_common/images/gpu_feedback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/gpu_feedback.png -------------------------------------------------------------------------------- /_basic/_common/images/jacobi_uncore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/jacobi_uncore.png -------------------------------------------------------------------------------- /_basic/_common/images/matrix_block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/matrix_block.png -------------------------------------------------------------------------------- /_basic/_common/images/numba_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/numba_summary.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_teams.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_teams.png -------------------------------------------------------------------------------- /_basic/_common/images/output_files.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/output_files.png -------------------------------------------------------------------------------- /_basic/_common/images/page-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/page-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/parallel_data.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_data.jpg -------------------------------------------------------------------------------- /_basic/_common/images/parallel_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_data.png -------------------------------------------------------------------------------- /_basic/_common/images/parallel_loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_loop.png -------------------------------------------------------------------------------- /_basic/_common/images/rule-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/rule-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/sass-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/sass-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/sol_baseline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/sol_baseline.png -------------------------------------------------------------------------------- /_basic/_common/images/source_hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/source_hover.png -------------------------------------------------------------------------------- /_basic/_common/images/thread_blocks.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/thread_blocks.JPG -------------------------------------------------------------------------------- /_basic/_common/images/thread_blocks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/thread_blocks.png -------------------------------------------------------------------------------- /_basic/_common/images/warp_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/warp_collapse.png -------------------------------------------------------------------------------- /_basic/openacc/source_code/readdata.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/openacc/source_code/readdata.mod -------------------------------------------------------------------------------- /_basic/openmp/source_code/readdata.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/openmp/source_code/readdata.mod -------------------------------------------------------------------------------- /_basic/_common/images/Nsight Diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/Nsight Diagram.png -------------------------------------------------------------------------------- /_basic/_common/images/baseline-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/baseline-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/charts-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/charts-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/collapse_thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/collapse_thread.png -------------------------------------------------------------------------------- /_basic/_common/images/compute-memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute-memory.png -------------------------------------------------------------------------------- /_basic/_common/images/compute-memtable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute-memtable.png -------------------------------------------------------------------------------- /_basic/_common/images/compute-sections.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute-sections.png -------------------------------------------------------------------------------- /_basic/_common/images/compute_analyz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute_analyz.png -------------------------------------------------------------------------------- /_basic/_common/images/compute_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/compute_command.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute_command.png -------------------------------------------------------------------------------- /_basic/_common/images/cuda_profile_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cuda_profile_api.png -------------------------------------------------------------------------------- /_basic/_common/images/expand-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/expand-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/f_compute_analyz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_compute_analyz.png -------------------------------------------------------------------------------- /_basic/_common/images/f_offload_grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_offload_grid.png -------------------------------------------------------------------------------- /_basic/_common/images/f_sol_baseline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_sol_baseline.png -------------------------------------------------------------------------------- /_basic/_common/images/f_source_hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_source_hover.png -------------------------------------------------------------------------------- /_basic/_common/images/github_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/github_structure.png -------------------------------------------------------------------------------- /_basic/_common/images/grace_hopper_ATS.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/grace_hopper_ATS.jpg -------------------------------------------------------------------------------- /_basic/_common/images/header-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/header-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/jacobi_cuda_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/jacobi_cuda_api.png -------------------------------------------------------------------------------- /_basic/_common/images/kernel_feedback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/kernel_feedback.png -------------------------------------------------------------------------------- /_basic/_common/images/kokkos_ecosystem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/kokkos_ecosystem.png -------------------------------------------------------------------------------- /_basic/_common/images/launch-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/launch-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/memory-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/memory-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/numba_summary1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/numba_summary1.png -------------------------------------------------------------------------------- /_basic/_common/images/nvtx_multicore.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nvtx_multicore.jpg -------------------------------------------------------------------------------- /_basic/_common/images/nvtx_multicore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nvtx_multicore.png -------------------------------------------------------------------------------- /_basic/_common/images/openacc_parallel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openacc_parallel.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_feedback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_feedback.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_fork_join.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_fork_join.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_multicore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_multicore.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_teams_for.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_teams_for.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_warp_cmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_warp_cmp.png -------------------------------------------------------------------------------- /_basic/_common/images/parallel_expand.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_expand.jpg -------------------------------------------------------------------------------- /_basic/_common/images/parallel_expand.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_expand.png -------------------------------------------------------------------------------- /_basic/_common/images/parallel_unified.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_unified.jpg -------------------------------------------------------------------------------- /_basic/_common/images/parallel_unified.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_unified.png -------------------------------------------------------------------------------- /_basic/_common/images/rapids_package.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/rapids_package.png -------------------------------------------------------------------------------- /_basic/_common/images/roofline-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/roofline-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/sections-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/sections-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/serial_cpu_rdf1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/serial_cpu_rdf1.png -------------------------------------------------------------------------------- /_basic/_common/images/serial_cpu_rdf2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/serial_cpu_rdf2.png -------------------------------------------------------------------------------- /_basic/_common/images/serial_output1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/serial_output1.png -------------------------------------------------------------------------------- /_basic/_common/images/serial_profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/serial_profile.png -------------------------------------------------------------------------------- /_basic/_common/images/serial_profiler1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/serial_profiler1.png -------------------------------------------------------------------------------- /_basic/_common/images/source-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/source-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/source_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/source_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/stdpar_multicore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/stdpar_multicore.png -------------------------------------------------------------------------------- /_basic/_common/images/summary-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/summary-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/thread_position.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/thread_position.png -------------------------------------------------------------------------------- /_basic/_common/images/uncoalesced_hint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/uncoalesced_hint.png -------------------------------------------------------------------------------- /_basic/_common/images/unified_memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/unified_memory.png -------------------------------------------------------------------------------- /_basic/_common/images/warning-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/warning-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/Optimization_Cycle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/Optimization_Cycle.jpg -------------------------------------------------------------------------------- /_basic/_common/images/allsection-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/allsection-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/baseline1-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/baseline1-compute.png -------------------------------------------------------------------------------- /_basic/_common/images/collapse_feedback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/collapse_feedback.png -------------------------------------------------------------------------------- /_basic/_common/images/cupy_kernel_memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cupy_kernel_memory.png -------------------------------------------------------------------------------- /_basic/_common/images/do_concurrent_gpu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/do_concurrent_gpu.jpg -------------------------------------------------------------------------------- /_basic/_common/images/f_collapse_thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_collapse_thread.png -------------------------------------------------------------------------------- /_basic/_common/images/f_compute_command.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_compute_command.png -------------------------------------------------------------------------------- /_basic/_common/images/f_memory_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_memory_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_multicore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_multicore.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_warp_cmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_warp_cmp.png -------------------------------------------------------------------------------- /_basic/_common/images/grace_hopper_arch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/grace_hopper_arch.jpg -------------------------------------------------------------------------------- /_basic/_common/images/kokkos_abstraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/kokkos_abstraction.png -------------------------------------------------------------------------------- /_basic/_common/images/kokkos_mirror_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/kokkos_mirror_view.png -------------------------------------------------------------------------------- /_basic/_common/images/numba_output_files.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/numba_output_files.png -------------------------------------------------------------------------------- /_basic/_common/images/openacc_construct.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openacc_construct.jpg -------------------------------------------------------------------------------- /_basic/_common/images/openacc_construct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openacc_construct.png -------------------------------------------------------------------------------- /_basic/_common/images/openacc_copyclause.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openacc_copyclause.png -------------------------------------------------------------------------------- /_basic/_common/images/openacc_parallel2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openacc_parallel2.png -------------------------------------------------------------------------------- /_basic/_common/images/pair_gpu_analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/pair_gpu_analysis.png -------------------------------------------------------------------------------- /_basic/_common/images/parallel_detailed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_detailed.png -------------------------------------------------------------------------------- /_basic/_common/images/parallel_timeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_timeline.jpg -------------------------------------------------------------------------------- /_basic/_common/images/parallel_timeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_timeline.png -------------------------------------------------------------------------------- /_basic/_common/images/roofline-achieved.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/roofline-achieved.png -------------------------------------------------------------------------------- /_basic/_common/images/roofline-analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/roofline-analysis.png -------------------------------------------------------------------------------- /_basic/_common/images/roofline-baseline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/roofline-baseline.png -------------------------------------------------------------------------------- /_basic/_common/images/roofline-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/roofline-overview.png -------------------------------------------------------------------------------- /_basic/_common/images/roofline_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/roofline_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/scheduler_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/scheduler_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/serial_output_file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/serial_output_file.png -------------------------------------------------------------------------------- /_basic/iso/source_code/SOLUTION/readdata.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/iso/source_code/SOLUTION/readdata.mod -------------------------------------------------------------------------------- /_basic/_common/images/C2C_Profiler_Output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/C2C_Profiler_Output.jpg -------------------------------------------------------------------------------- /_basic/_common/images/compute_command_line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute_command_line.png -------------------------------------------------------------------------------- /_basic/_common/images/cuda_profile_timeline.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cuda_profile_timeline.jpg -------------------------------------------------------------------------------- /_basic/_common/images/cuda_profile_timeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/cuda_profile_timeline.png -------------------------------------------------------------------------------- /_basic/_common/images/f_collapse_feedback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_collapse_feedback.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_collapse_reg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_collapse_reg.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_gpu_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_gpu_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/f_roofline_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_roofline_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/grace_hopper_numactl.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/grace_hopper_numactl.jpg -------------------------------------------------------------------------------- /_basic/_common/images/kernel_indep_feedback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/kernel_indep_feedback.png -------------------------------------------------------------------------------- /_basic/_common/images/memory_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/memory_architecture.png -------------------------------------------------------------------------------- /_basic/_common/images/nsys-compute-command.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nsys-compute-command.png -------------------------------------------------------------------------------- /_basic/_common/images/nsys-compute-command1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nsys-compute-command1.png -------------------------------------------------------------------------------- /_basic/_common/images/nsys-compute-command2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nsys-compute-command2.png -------------------------------------------------------------------------------- /_basic/_common/images/nvtx_multicore (copy).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/nvtx_multicore (copy).png -------------------------------------------------------------------------------- /_basic/_common/images/openacc correlation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openacc correlation.jpg -------------------------------------------------------------------------------- /_basic/_common/images/openacc correlation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openacc correlation.png -------------------------------------------------------------------------------- /_basic/_common/images/openacc_3_directives.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openacc_3_directives.png -------------------------------------------------------------------------------- /_basic/_common/images/openacc_parallel_loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openacc_parallel_loop.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_collapse_reg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_collapse_reg.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_gpu_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_gpu_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_target_teams.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_target_teams.png -------------------------------------------------------------------------------- /_basic/_common/images/serial_cupy_profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/serial_cupy_profile.png -------------------------------------------------------------------------------- /_basic/_common/images/serial_numba_profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/serial_numba_profile.png -------------------------------------------------------------------------------- /_basic/_common/images/source_sass_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/source_sass_collapse.png -------------------------------------------------------------------------------- /_basic/openmp/source_code/SOLUTION/readdata.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/openmp/source_code/SOLUTION/readdata.mod -------------------------------------------------------------------------------- /_basic/_common/images/do_concurrent_multicore.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/do_concurrent_multicore.jpg -------------------------------------------------------------------------------- /_basic/_common/images/f_offload_compare_nvtx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_offload_compare_nvtx.png -------------------------------------------------------------------------------- /_basic/_common/images/gpu_programming_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/gpu_programming_process.png -------------------------------------------------------------------------------- /_basic/_common/images/heterogeneous_computing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/heterogeneous_computing.jpg -------------------------------------------------------------------------------- /_basic/_common/images/jacobi_solution_uncore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/jacobi_solution_uncore.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_offload_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_offload_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_offload_roofline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_offload_roofline.png -------------------------------------------------------------------------------- /_basic/_common/images/parallel_data_feedback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/parallel_data_feedback.png -------------------------------------------------------------------------------- /_basic/openacc/source_code/SOLUTION/readdata.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/openacc/source_code/SOLUTION/readdata.mod -------------------------------------------------------------------------------- /_basic/_common/images/compute_collapse_roofline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/compute_collapse_roofline.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openacc_data_directive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openacc_data_directive.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_collapse_baseline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_collapse_baseline.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_offload_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_offload_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_offload_occupancy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_offload_occupancy.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_offload_roofline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_offload_roofline.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_offload_split_cmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_offload_split_cmp.png -------------------------------------------------------------------------------- /_basic/_common/images/grace_hopper_global_access.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/grace_hopper_global_access.jpg -------------------------------------------------------------------------------- /_basic/_common/images/jacobi_solution_cuda_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/jacobi_solution_cuda_api.png -------------------------------------------------------------------------------- /_basic/_common/images/openacc_multicore_feedback.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openacc_multicore_feedback.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_collapse_baseline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_collapse_baseline.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_collapse_reg_memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_collapse_reg_memory.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_feedback_collapse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_feedback_collapse.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_feedback_multicore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_feedback_multicore.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_offload_occupancy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_offload_occupancy.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_offload_split_cmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_offload_split_cmp.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_offload_split_cmp2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_offload_split_cmp2.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_offload_split_grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_offload_split_grid.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_parallel_construct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_parallel_construct.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_target_distribute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_target_distribute.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_collapse_reg_memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_collapse_reg_memory.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_offload_split_cmp2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_offload_split_cmp2.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_offload_split_grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_offload_split_grid.png -------------------------------------------------------------------------------- /_basic/_common/images/grace_hopper_page_migration.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/grace_hopper_page_migration.jpg -------------------------------------------------------------------------------- /_basic/_common/images/openmp_collapse_reg_roofline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_collapse_reg_roofline.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_parallelfor_construct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_parallelfor_construct.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_collapse_reg_occupancy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_collapse_reg_occupancy.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_collapse_reg_roofline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_collapse_reg_roofline.png -------------------------------------------------------------------------------- /_basic/_common/images/f_openmp_feedback_offload_split.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/f_openmp_feedback_offload_split.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_collapse_reg_occupancy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_collapse_reg_occupancy.png -------------------------------------------------------------------------------- /_basic/_common/images/openmp_feedback_offload_split.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/_common/images/openmp_feedback_offload_split.png -------------------------------------------------------------------------------- /_basic/memory_coherent/source_code/jacobi_report.nsys-rep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/memory_coherent/source_code/jacobi_report.nsys-rep -------------------------------------------------------------------------------- /_basic/memory_coherent/source_code/jacobi_report.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openhackathons-org/nways_accelerated_programming/main/_basic/memory_coherent/source_code/jacobi_report.sqlite -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | */.ipynb_checkpoints/* 3 | alk.traj.dcd 4 | *.simg 5 | *.sif 6 | *.so* 7 | *.a 8 | *.la 9 | mgpm 10 | *.o 11 | *.out 12 | */.ses/* 13 | */.log/* 14 | */not repo/* 15 | */.nsys-rep/* 16 | */.sqlite/* 17 | */.ncu-rep/* 18 | not repo/ 19 | _test/ 20 | _advanced/ 21 | README_.md 22 | 23 | -------------------------------------------------------------------------------- /_basic/_common/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | 3 | import gdown 4 | import os 5 | 6 | ## alk.traj.dcd input file 7 | #url = 'https://drive.google.com/uc?id=1WZ0rtXZ-uMLfy7htT0gaU4EQ_Rq61QTF&export=download' 8 | url = 'https://drive.google.com/u/0/uc?export=download&confirm=jDXw&id=1WZ0rtXZ-uMLfy7htT0gaU4EQ_Rq61QTF' 9 | output_ = '/labs/_common/input/alk.traj.dcd' 10 | gdown.download(url, output_, quiet=False,proxy=None) 11 | -------------------------------------------------------------------------------- /_basic/_common/dataset_python.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | 3 | import gdown 4 | import os 5 | 6 | ## alk.traj.dcd input file 7 | #url = 'https://drive.google.com/uc?id=1WZ0rtXZ-uMLfy7htT0gaU4EQ_Rq61QTF&export=download' 8 | url = 'https://drive.google.com/u/0/uc?export=download&confirm=jDXw&id=1WZ0rtXZ-uMLfy7htT0gaU4EQ_Rq61QTF' 9 | output = '/labs/python/source_code/input/alk.traj.dcd' 10 | gdown.download(url, output, quiet=False, proxy=None) 11 | -------------------------------------------------------------------------------- /_basic/memory_coherent/source_code/unified_test.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int d; 6 | cudaGetDevice(&d); 7 | 8 | int pma = 0; 9 | cudaDeviceGetAttribute(&pma, cudaDevAttrPageableMemoryAccess, d); 10 | printf("Full Unified Memory Support: %s\n", pma == 1? "YES" : "NO"); 11 | 12 | int cma = 0; 13 | cudaDeviceGetAttribute(&cma, cudaDevAttrConcurrentManagedAccess, d); 14 | printf("CUDA Managed Memory with full support: %s\n", cma == 1? "YES" : "NO"); 15 | 16 | return 0; 17 | } -------------------------------------------------------------------------------- /_basic/openmp/source_code/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | 3 | FC := nvfortran 4 | CC := nvc++ 5 | 6 | FLAGS := -O3 -w 7 | ACCFLAGS := -mp=multicore -Minfo=mp 8 | 9 | NVTXLIB_F := -lnvhpcwrapnvtx 10 | 11 | VER=$(shell nvc -dumpversion) 12 | NVARCH=$(shell uname -s)_$(shell uname -m) 13 | NVTXLIB_C := -I/opt/nvidia/hpc_sdk/$(NVARCH)/$(VER)/cuda/include 14 | 15 | rdf_f: rdf.f90 16 | ${FC} ${FLAGS} ${ACCFLAGS} -o rdf_f rdf.f90 ${NVTXLIB_F} 17 | 18 | rdf_c: rdf.cpp 19 | ${CC} ${FLAGS} ${ACCFLAGS} -o rdf_c rdf.cpp ${NVTXLIB_C} 20 | 21 | clean: 22 | rm -f *.o rdf_f rdf_c 23 | -------------------------------------------------------------------------------- /_basic/openacc/source_code/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | 3 | FC := nvfortran 4 | CC := nvc++ 5 | 6 | FLAGS := -O3 -w 7 | ACCFLAGS := -acc=multicore -Minfo=accel 8 | 9 | NVTXLIB_F := -lnvhpcwrapnvtx 10 | 11 | VER=$(shell nvc -dumpversion) 12 | NVARCH=$(shell uname -s)_$(shell uname -m) 13 | NVTXLIB_C := -I/opt/nvidia/hpc_sdk/$(NVARCH)/$(VER)/cuda/include 14 | 15 | rdf_f: rdf.f90 16 | ${FC} ${FLAGS} ${ACCFLAGS} -o rdf_f rdf.f90 ${NVTXLIB_F} 17 | 18 | rdf_c: rdf.cpp 19 | ${CC} ${FLAGS} ${ACCFLAGS} -o rdf_c rdf.cpp ${NVTXLIB_C} 20 | 21 | clean: 22 | rm -f *.o rdf_f rdf_c 23 | -------------------------------------------------------------------------------- /_basic/_common/source_code/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | 3 | CC := nvc++ 4 | CFLAGS := -O3 -w -ldl 5 | ACCFLAGS := -Minfo=accel 6 | VER=$(shell nvc -dumpversion) 7 | NVARCH=$(shell uname -s)_$(shell uname -m) 8 | NVTXLIB_c := -I/opt/nvidia/hpc_sdk/$(NVARCH)/$(VER)/cuda/include 9 | 10 | FC := nvfortran 11 | FLAGS := -O3 -w 12 | NVTXLIB_f := -lnvhpcwrapnvtx 13 | 14 | rdf_f:rdf.f90 15 | ${FC} ${FLAGS} ${ACCFLAGS} rdf.f90 -o rdf_f ${NVTXLIB_f} 16 | 17 | 18 | rdf_c: rdf.cpp 19 | ${CC} ${CFLAGS} ${ACCFLAGS} -o rdf_c rdf.cpp ${NVTXLIB_c} 20 | 21 | clean: 22 | rm -f *.o rdf_c rdf_f -------------------------------------------------------------------------------- /_basic/iso/source_code/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | 3 | FC := nvfortran 4 | CC := nvc++ 5 | 6 | FLAGS := -O3 -w 7 | ACCFLAGS := -stdpar=multicore -Minfo 8 | 9 | NVTXLIB_F := -lnvhpcwrapnvtx 10 | 11 | VER=$(shell nvc -dumpversion) 12 | NVARCH=$(shell uname -s)_$(shell uname -m) 13 | NVTXLIB_C := -I/opt/nvidia/hpc_sdk/$(NVARCH)/$(VER)/cuda/include 14 | 15 | rdf_f: rdf.f90 16 | ${FC} ${FLAGS} ${ACCFLAGS} -o rdf_f rdf.f90 ${NVTXLIB_F} 17 | 18 | rdf_c: rdf.cpp 19 | ${CC} ${FLAGS} -std=c++20 ${ACCFLAGS} -o rdf_c rdf.cpp -fopenmp ${NVTXLIB_C} 20 | 21 | clean: 22 | rm -f *.o rdf_f rdf_c 23 | -------------------------------------------------------------------------------- /_basic/cuda/Presentations/README.md: -------------------------------------------------------------------------------- 1 | For Partners who are interested in delivering the critical hands-on skills needed to advance science in form of Bootcamp can reach out to us at [Open Hackathons Partner](https://www.openhackathons.org/s/about-open-hackathons) website. In addition to current bootcamp material the Partners will be provided with the following: 2 | 3 | - Presentation: All the Bootcamps are accompanied with training material presentations which can be used during the Bootcamp session. 4 | - Mini challenge : To test the knowledge gained during this Bootcamp a mini application challenge is provided along with sample Solution. 5 | - Additional Support: On case to case basis the Partners can also be trained on how to effectively deliver the Bootcamp with maximal impact. -------------------------------------------------------------------------------- /_basic/iso/Presentations/README.md: -------------------------------------------------------------------------------- 1 | For Partners who are interested in delivering the critical hands-on skills needed to advance science in form of Bootcamp can reach out to us at [Open Hackathons Partner](https://www.openhackathons.org/s/about-open-hackathons) website. In addition to current bootcamp material the Partners will be provided with the following: 2 | 3 | - Presentation: All the Bootcamps are accompanied with training material presentations which can be used during the Bootcamp session. 4 | - Mini challenge : To test the knowledge gained during this Bootcamp a mini application challenge is provided along with sample Solution. 5 | - Additional Support: On case to case basis the Partners can also be trained on how to effectively deliver the Bootcamp with maximal impact. -------------------------------------------------------------------------------- /_basic/kokkos/Presentations/README.md: -------------------------------------------------------------------------------- 1 | For Partners who are interested in delivering the critical hands-on skills needed to advance science in form of Bootcamp can reach out to us at [Open Hackathons Partner](https://www.openhackathons.org/s/about-open-hackathons) website. In addition to current bootcamp material the Partners will be provided with the following: 2 | 3 | - Presentation: All the Bootcamps are accompanied with training material presentations which can be used during the Bootcamp session. 4 | - Mini challenge : To test the knowledge gained during this Bootcamp a mini application challenge is provided along with sample Solution. 5 | - Additional Support: On case to case basis the Partners can also be trained on how to effectively deliver the Bootcamp with maximal impact. -------------------------------------------------------------------------------- /_basic/openacc/Presentations/README.md: -------------------------------------------------------------------------------- 1 | For Partners who are interested in delivering the critical hands-on skills needed to advance science in form of Bootcamp can reach out to us at [Open Hackathons Partner](https://www.openhackathons.org/s/about-open-hackathons) website. In addition to current bootcamp material the Partners will be provided with the following: 2 | 3 | - Presentation: All the Bootcamps are accompanied with training material presentations which can be used during the Bootcamp session. 4 | - Mini challenge : To test the knowledge gained during this Bootcamp a mini application challenge is provided along with sample Solution. 5 | - Additional Support: On case to case basis the Partners can also be trained on how to effectively deliver the Bootcamp with maximal impact. -------------------------------------------------------------------------------- /_basic/openmp/Presentations/README.md: -------------------------------------------------------------------------------- 1 | For Partners who are interested in delivering the critical hands-on skills needed to advance science in form of Bootcamp can reach out to us at [Open Hackathons Partner](https://www.openhackathons.org/s/about-open-hackathons) website. In addition to current bootcamp material the Partners will be provided with the following: 2 | 3 | - Presentation: All the Bootcamps are accompanied with training material presentations which can be used during the Bootcamp session. 4 | - Mini challenge : To test the knowledge gained during this Bootcamp a mini application challenge is provided along with sample Solution. 5 | - Additional Support: On case to case basis the Partners can also be trained on how to effectively deliver the Bootcamp with maximal impact. -------------------------------------------------------------------------------- /_basic/python/Presentations/README.md: -------------------------------------------------------------------------------- 1 | For Partners who are interested in delivering the critical hands-on skills needed to advance science in form of Bootcamp can reach out to us at [Open Hackathons Partner](https://www.openhackathons.org/s/about-open-hackathons) website. In addition to current bootcamp material the Partners will be provided with the following: 2 | 3 | - Presentation: All the Bootcamps are accompanied with training material presentations which can be used during the Bootcamp session. 4 | - Mini challenge : To test the knowledge gained during this Bootcamp a mini application challenge is provided along with sample Solution. 5 | - Additional Support: On case to case basis the Partners can also be trained on how to effectively deliver the Bootcamp with maximal impact. -------------------------------------------------------------------------------- /_basic/memory_coherent/source_code/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. 2 | NVCC=nvcc 3 | #CUDA_HOME=hpc_sdk_path/Linux_x86_64/21.3/cuda/11.2/ 4 | GENCODE_SM60 := -gencode arch=compute_60,code=sm_60 5 | GENCODE_SM70 := -gencode arch=compute_70,code=sm_70 6 | GENCODE_SM80 := -gencode arch=compute_80,code=sm_80 -gencode arch=compute_80,code=compute_80 7 | GENCODE_SM90 := -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90,code=compute_90 8 | GENCODE_FLAGS := $(GENCODE_SM70) $(GENCODE_SM80) $(GENCODE_SM90) 9 | ifdef DISABLE_CUB 10 | NVCC_FLAGS = -Xptxas --optimize-float-atomics 11 | else 12 | NVCC_FLAGS = -DHAVE_CUB 13 | endif 14 | NVCC_FLAGS += -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 15 | jacobi: Makefile jacobi.cu 16 | $(NVCC) $(NVCC_FLAGS) jacobi.cu -o jacobi 17 | 18 | .PHONY.: clean 19 | clean: 20 | rm -f jacobi jacobi_report.nsys-rep 21 | 22 | sanitize: jacobi 23 | compute-sanitizer ./jacobi -niter 10 24 | 25 | run: jacobi 26 | ./jacobi 27 | 28 | profile: jacobi 29 | nsys profile --trace=cuda,nvtx -o jacobi ./jacobi -niter 10 30 | -------------------------------------------------------------------------------- /_basic/openmp/source_code/dcdread.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | using namespace std; 3 | 4 | void dcdreadhead(int *natom, int *nframes, std::istream &infile) { 5 | 6 | infile.seekg(8,ios::beg); 7 | infile.read((char*)nframes, sizeof(int)); 8 | infile.seekg(64*4,ios::cur); 9 | infile.read((char*)natom, sizeof(int)); 10 | infile.seekg(1*8,ios::cur); 11 | return; 12 | } 13 | 14 | void dcdreadframe(double *x, double *y, double *z, std::istream &infile, 15 | int natom, double& xbox,double& ybox,double& zbox){ 16 | 17 | double d[6]; 18 | for (int i=0;i<6;i++) { 19 | infile.read((char*)&d[i], sizeof(double)); 20 | } 21 | xbox=d[0]; 22 | ybox=d[2]; 23 | zbox=d[5]; 24 | float a,b,c; 25 | infile.seekg(1*8,ios::cur); 26 | for (int i=0;i 3 | #include 4 | 5 | #define N 512 6 | 7 | void host_add(int *a, int *b, int *c) { 8 | for(int idx=0;idx>>(d_a,d_b,d_c); 50 | 51 | // Copy result back to host 52 | cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost); 53 | 54 | print_output(a,b,c); 55 | 56 | free(a); free(b); free(c); 57 | cudaFree(d_a); cudaFree(d_b); cudaFree(d_c); 58 | 59 | 60 | 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /_basic/cuda/source_code/vector_addition_gpu_thread_only.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | #include 3 | #include 4 | 5 | #define N 512 6 | 7 | void host_add(int *a, int *b, int *c) { 8 | for(int idx=0;idx>>(d_a,d_b,d_c); 50 | 51 | // Copy result back to host 52 | cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost); 53 | 54 | print_output(a,b,c); 55 | 56 | free(a); free(b); free(c); 57 | cudaFree(d_a); cudaFree(d_b); cudaFree(d_c); 58 | 59 | 60 | 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /nways_Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | 3 | # To build the docker container, run: $ sudo docker build -f nways_Dockerfile -t nways:cf . 4 | # To run: $ sudo docker run --rm -it --runtime nvidia -p 8888:8888 nways:cf 5 | # Finally, open http://localhost:8888/ 6 | 7 | FROM nvcr.io/nvidia/nvhpc:24.1-devel-cuda_multi-ubuntu22.04 8 | 9 | RUN apt-get -y update && \ 10 | DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends python3-pip python3-setuptools nginx zip make build-essential libtbb-dev python3-dev && \ 11 | rm -rf /var/lib/apt/lists/* && \ 12 | pip3 install --upgrade pip &&\ 13 | pip3 install gdown 14 | 15 | RUN apt-get update -y 16 | RUN apt-get install -y git nvidia-modprobe 17 | RUN pip3 install jupyterlab 18 | # Install required python packages 19 | RUN pip3 install ipywidgets 20 | 21 | ############################################ 22 | RUN apt-get update -y 23 | 24 | RUN git clone https://github.com/NVIDIA/nvbandwidth.git 25 | RUN cd nvbandwidth && apt update && apt install -y libboost-program-options-dev && ./debian_install.sh 26 | RUN cd .. 27 | ############################################ 28 | RUN apt-get update -y 29 | 30 | # TO COPY the data 31 | COPY _basic/openacc/ /labs/openacc 32 | COPY _basic/openmp/ /labs/openmp 33 | COPY _basic/_common/ /labs/_common 34 | COPY _basic/iso/ /labs/iso 35 | COPY _basic/cuda/ /labs/cuda 36 | COPY _basic/_start_nways.ipynb /labs 37 | COPY _basic/memory_coherent/ /labs/memory_coherent 38 | 39 | RUN python3 /labs/_common/dataset.py 40 | 41 | ################################################# 42 | ENV PATH="/usr/local/bin:/opt/anaconda3/bin:/usr/bin:$PATH" 43 | ################################################# 44 | 45 | WORKDIR /labs 46 | CMD jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/labs 47 | -------------------------------------------------------------------------------- /_basic/cuda/source_code/vector_addition_gpu_thread_block.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | #include 3 | #include 4 | 5 | #define N 512 6 | 7 | void host_add(int *a, int *b, int *c) { 8 | for(int idx=0;idx>>(d_a,d_b,d_c); 53 | 54 | // Copy result back to host 55 | cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost); 56 | 57 | print_output(a,b,c); 58 | 59 | free(a); free(b); free(c); 60 | cudaFree(d_a); cudaFree(d_b); cudaFree(d_c); 61 | 62 | 63 | 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /_scripts/nways_Singularity: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | 3 | # To build the singularity container, run: $ singularity build --fakeroot nways_c.simg nways_Singularity 4 | # To copy the content of the container: $ singularity run nways_c.simg cp -rT /labs ~/labs 5 | # To run: $ singularity run --nv nways_c.simg jupyter-lab --notebook-dir=~/labs 6 | # Finally, open http://localhost:8888/ 7 | 8 | Bootstrap: docker 9 | FROM: nvcr.io/nvidia/nvhpc:23.5-devel-cuda_multi-ubuntu20.04 10 | 11 | %environment 12 | export XDG_RUNTIME_DIR= 13 | export PATH="$PATH:/usr/local/bin:/opt/anaconda3/bin:/usr/bin" 14 | 15 | %post 16 | build_tmp=$(mktemp -d) && cd ${build_tmp} 17 | 18 | apt-get -y update 19 | apt-get -y dist-upgrade 20 | DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends \ 21 | m4 vim-nox emacs-nox nano zip\ 22 | python3-pip python3-setuptools git-core inotify-tools \ 23 | curl git-lfs \ 24 | build-essential libtbb-dev 25 | rm -rf /var/lib/apt/cache/* 26 | 27 | pip3 install --upgrade pip 28 | pip3 install gdown 29 | apt-get update -y 30 | apt-get -y install git nvidia-modprobe 31 | pip3 install jupyterlab 32 | pip3 install ipywidgets 33 | 34 | apt-get install --no-install-recommends -y build-essential 35 | 36 | python3 /labs/_common/dataset.py 37 | 38 | apt-get update -y 39 | apt-get install --no-install-recommends -y build-essential 40 | 41 | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 42 | bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/anaconda3 43 | rm Miniconda3-latest-Linux-x86_64.sh 44 | 45 | cd / 46 | rm -rf ${build_tmp} 47 | 48 | %files 49 | ../_basic/openacc/ /labs/openacc 50 | ../_basic/openmp/ /labs/openmp 51 | ../_basic/_common/ /labs/_common 52 | ../_basic/iso/ /labs/iso 53 | ../_basic/cuda/ /labs/cuda 54 | ../_basic/_start_nways.ipynb /labs 55 | 56 | %runscript 57 | "$@" 58 | 59 | %labels 60 | AUTHOR mozhgank 61 | -------------------------------------------------------------------------------- /Repo_Structure.md: -------------------------------------------------------------------------------- 1 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 2 | 3 | # HPC_Bootcamp 4 | 5 | This repository contains training content for the HPC_Bootcamp materials. This repository includes the following file structure in the initial two levels: 6 | 7 | ``` 8 | │ ├── 9 | ├── _basic 10 | │ ├── cuda 11 | │ ├── iso 12 | │ ├── openacc 13 | │ ├── openmp 14 | │ └── python 15 | ├── LICENSE 16 | ├── README.md 17 | ├── nways_Dockerfile 18 | ├── nways_Dockerfile_python 19 | ├── nways_Singularity 20 | ├── nways_Singularity_python 21 | ├── CONTRIBUTING.md 22 | ├── Deployment_Guide.md 23 | ├── _scripts 24 | └── start_notebook 25 | ``` 26 | 27 | - The __basic_ directory contains all of the introductory training materials for CUDA, Standard Languages, OpenMP Offloading, and OpenACC. 28 | - The __scripts_ directory contains container definition files for each bootcamp type. This is not needed at the moment. Please refer to the [Deployment_Guide](Deployment_Guide.md). 29 | - The __start_notebook_ directory contains started notebooks and it is optional to use. This is not needed at the moment. 30 | 31 | 32 | 33 | ### Building the container using the definition files inside the `_script` folder 34 | 35 | To build the singularity container, run: 36 | `sudo singularity build miniapp.simg {Name of the content}_Singularity` , alternatively you can use `singularity build --fakeroot miniapp.simg {Name of the content}_Singularity` if you do not have `sudo` rights. 37 | 38 | Next, copy the files to a local directory to make sure changes are stored locally: 39 | `singularity run miniapp.simg cp -rT /labs ~/labs` 40 | 41 | Then, run the container: 42 | `singularity run --nv miniapp.simg jupyter-lab --notebook-dir=~/labs` 43 | 44 | Once inside the container, open the jupyter lab in browser: http://localhost:8888, and start the lab by clicking on the `_start_{Name of the content}.ipynb` notebook. 45 | 46 | -------------------------------------------------------------------------------- /nways_Singularity: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | 3 | # To build the singularity container, run: $ singularity build --fakeroot nways_c.simg nways_Singularity 4 | # To copy the content of the container: $ singularity run nways_c.simg cp -rT /labs ~/labs 5 | # To run: $ singularity run --nv nways_c.simg jupyter-lab --notebook-dir=~/labs 6 | # Finally, open http://localhost:8888/ 7 | 8 | Bootstrap: docker 9 | FROM: nvcr.io/nvidia/nvhpc:24.1-devel-cuda_multi-ubuntu22.04 10 | 11 | %environment 12 | export XDG_RUNTIME_DIR= 13 | export PATH="$PATH:/usr/local/bin:/opt/anaconda3/bin:/usr/bin" 14 | 15 | %post 16 | #build_tmp=$(mktemp -d) && cd ${build_tmp} 17 | . /.singularity.d/env/10-docker*.sh 18 | apt-get -y update 19 | #apt-get -y dist-upgrade 20 | DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends \ 21 | python3-pip python3-setuptools nginx zip make build-essential libtbb-dev python3-dev\ 22 | curl git-lfs 23 | 24 | rm -rf /var/lib/apt/cache/* 25 | 26 | pip3 install --upgrade pip 27 | pip3 install gdown 28 | apt-get update -y 29 | apt-get -y install git nvidia-modprobe 30 | pip3 install jupyterlab 31 | pip3 install ipywidgets 32 | 33 | apt-get update -y 34 | 35 | git clone https://github.com/NVIDIA/nvbandwidth.git 36 | cd nvbandwidth && apt update && apt install -y libboost-program-options-dev && ./debian_install.sh 37 | cd .. 38 | 39 | ############################################ 40 | apt-get update -y 41 | 42 | python3 /labs/_common/dataset.py 43 | 44 | #wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 45 | #bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/anaconda3 46 | #rm Miniconda3-latest-Linux-x86_64.sh 47 | 48 | #cd / 49 | #rm -rf ${build_tmp} 50 | 51 | %files 52 | _basic/openacc/ /labs/openacc 53 | _basic/openmp/ /labs/openmp 54 | _basic/_common/ /labs/_common 55 | _basic/iso/ /labs/iso 56 | _basic/cuda/ /labs/cuda 57 | _basic/_start_nways.ipynb /labs 58 | _basic/memory_coherent/ /labs/memory_coherent 59 | 60 | %runscript 61 | "$@" 62 | 63 | %labels 64 | AUTHOR mozhgank -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 2 | 3 | # N-ways to GPU programming 4 | The N-Ways to GPU Programming Bootcamp covers the basics of GPU programming and provides an overview of different methods for porting scientific application to GPUs using NVIDIA® CUDA®, OpenACC, standard languages, OpenMP offloading, and/or CuPy and Numba. Throughout the bootcamp, attendees with learn how to analyze GPU-enabled applications using NVIDIA Nsight™ Systems and participate in hands-on activities to apply these learned skills to real-world problems. 5 | 6 | ## Bootcamp contents: 7 | 8 | The content is structured in multiple options covering the following: 9 | 10 | - Option 1: N-Ways to GPU Programming-C-Fortran 11 | - NVIDIA® Nsight™ Systems 12 | - NVIDIA® Nsight™ Compute (Optional, lecture only) 13 | - Lab 1: ISO C++ and ISO Fortran 14 | - Lab 2: OpenACC 15 | - Lab 3: OpenMP offloading 16 | - Lab 4: CUDA 17 | - Lab 5: Memory Coherent Architectures 18 | 19 | - Option 2: N-Ways to GPU Programming-Python 20 | - NVIDIA® Nsight™ Systems 21 | - NVIDIA® Nsight™ Compute (Optional, lecture only) 22 | - Lab 1: CuPy 23 | - Lab 2: Numba 24 | 25 | ## Tools and frameworks: 26 | 27 | The tools and frameworks used in the bootcamp are as follows: 28 | - [NVIDIA HPC SDK](https://developer.nvidia.com/hpc-sdk) 29 | - [NVIDIA Nsight™ Systems](https://developer.nvidia.com/nsight-systems) 30 | 31 | ## Bootcamp duration: 32 | 33 | The N-Ways to GPU Programming-C-Fortran Bootcamp will take approximately 6 hours and the N-Ways to GPU Programming-Python Bootcamp will take approximately 4.5 hours. 34 | 35 | ## Bootcamp prerequisites: 36 | 37 | Basic experience with C/C++ or Fortran is needed for the "N-Ways to GPU Programming-C-Fortran" Bootcamp and Python is needed for the "N-Ways to GPU Programming-Python" Bootcamp. No GPU programming experience is required. 38 | 39 | ## Deploying the Bootcamp materials: 40 | 41 | For deploying the materials, please refer to the Deployment guide present [here](Deployment_Guide.md) 42 | 43 | ## Attribution 44 | 45 | This material originates from the OpenHackathons Github repository. Check out additional materials [here](https://github.com/openhackathons-org). 46 | 47 | Don't forget to check out additional [Open Hackathons Resources](https://www.openhackathons.org/s/technical-resources) and join our [OpenACC and Hackathons Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community. 48 | 49 | ## Licensing 50 | 51 | Copyright © 2023 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials may include references to hardware and software developed by other entities; all applicable licensing and copyrights apply. 52 | -------------------------------------------------------------------------------- /_scripts/nways_Dockerfile_python: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | 3 | # To build the docker container, run: $ sudo docker build -f nways_Dockerfile_python -t nways:p . 4 | # To run: $ sudo docker run --rm -it --runtime nvidia -p 8888:8888 nways:p 5 | # Finally, open http://localhost:8888/ 6 | 7 | #FROM nvidia/cuda:11.2.2-devel-ubuntu20.04 8 | FROM nvcr.io/nvidia/cuda:11.4.2-devel-ubuntu20.04 9 | 10 | ##### 11 | # Read https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212772 12 | RUN apt-key del 7fa2af80 13 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub 14 | ##### 15 | 16 | RUN apt-get -y update && \ 17 | DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends \ 18 | python3-dev \ 19 | python3-pip python3-setuptools nginx zip make build-essential libtbb-dev && \ 20 | rm -rf /var/lib/apt/lists/* 21 | 22 | RUN pip3 install --no-cache-dir -U install setuptools pip 23 | RUN pip3 install gdown 24 | RUN apt-get update -y 25 | RUN apt-get install -y git nvidia-modprobe 26 | # Install required python packages 27 | RUN pip3 install jupyterlab 28 | RUN pip3 install ipywidgets 29 | #RUN pip3 install --upgrade numpy==1.19.5 30 | RUN pip3 install --upgrade numpy==1.21.1 31 | #RUN pip3 install --no-cache-dir "cupy-cuda112==9.0.0" \ 32 | RUN pip3 install --no-cache-dir "cupy-cuda114==10.3.1" \ 33 | numba==0.53.1 scipy 34 | 35 | ############################################ 36 | # NVIDIA nsight-systems-cli-2022.1.1, nsight-compute-2022.1.1 37 | RUN apt-get update -y && \ 38 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 39 | apt-transport-https \ 40 | ca-certificates \ 41 | gnupg \ 42 | wget && \ 43 | #apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80 && \ 44 | wget -qO - https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/nvidia.pub | apt-key add - &&\ 45 | echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list &&\ 46 | apt-get update -y 47 | 48 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-cli-2022.1.1 nsight-compute-2022.1.1 49 | 50 | # TO COPY the data 51 | COPY python/ /labs/python 52 | COPY _common/ /labs/_common 53 | COPY _start_nways.ipynb /labs 54 | 55 | RUN python3 /labs/_common/dataset_python.py 56 | 57 | ################################################# 58 | ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib/python3.8/dist-packages:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" 59 | ENV PATH="/opt/nvidia/nsight-systems/2022.1.1/bin:/opt/nvidia/nsight-compute/2022.1.1:/usr/local/bin:/bin:/usr/local/cuda/bin:/usr/bin${PATH:+:${PATH}}" 60 | 61 | RUN pip3 install --no-cache-dir MDAnalysis 62 | 63 | #ADD nways_labs/ /labs 64 | WORKDIR /labs 65 | CMD jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/labs 66 | -------------------------------------------------------------------------------- /nways_Dockerfile_python: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | 3 | # To build the docker container, run: $ sudo docker build -f nways_Dockerfile_python -t nways:p . 4 | # To run: $ sudo docker run --rm -it --runtime nvidia -p 8888:8888 nways:p 5 | # Finally, open http://localhost:8888/ 6 | 7 | #FROM nvidia/cuda:11.2.2-devel-ubuntu20.04 8 | FROM nvcr.io/nvidia/cuda:11.4.2-devel-ubuntu20.04 9 | 10 | ##### 11 | # Read https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212772 12 | RUN apt-key del 7fa2af80 13 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub 14 | ##### 15 | 16 | RUN apt-get -y update && \ 17 | DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends \ 18 | python3-dev \ 19 | python3-pip python3-setuptools nginx zip make build-essential libtbb-dev && \ 20 | rm -rf /var/lib/apt/lists/* 21 | 22 | RUN pip3 install --no-cache-dir -U install setuptools pip 23 | RUN pip3 install gdown 24 | RUN apt-get update -y 25 | RUN apt-get install -y git nvidia-modprobe 26 | # Install required python packages 27 | RUN pip3 install jupyterlab 28 | RUN pip3 install ipywidgets 29 | #RUN pip3 install --upgrade numpy==1.19.5 30 | RUN pip3 install --upgrade numpy==1.21.1 31 | #RUN pip3 install --no-cache-dir "cupy-cuda112==9.0.0" \ 32 | RUN pip3 install --no-cache-dir "cupy-cuda114==10.3.1" \ 33 | numba==0.53.1 scipy 34 | 35 | ############################################ 36 | # NVIDIA nsight-systems-cli-2022.1.1, nsight-compute-2022.1.1 37 | RUN apt-get update -y && \ 38 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 39 | apt-transport-https \ 40 | ca-certificates \ 41 | gnupg \ 42 | wget && \ 43 | #apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80 && \ 44 | wget -qO - https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/nvidia.pub | apt-key add - &&\ 45 | echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list &&\ 46 | apt-get update -y 47 | 48 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-cli-2022.1.1 nsight-compute-2022.1.1 49 | 50 | # TO COPY the data 51 | COPY _basic/python/ /labs/python 52 | COPY _basic/_common/ /labs/_common 53 | COPY _basic/_start_nways.ipynb /labs 54 | 55 | RUN python3 /labs/_common/dataset_python.py 56 | 57 | ################################################# 58 | ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib/python3.8/dist-packages:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" 59 | ENV PATH="/opt/nvidia/nsight-systems/2022.1.1/bin:/opt/nvidia/nsight-compute/2022.1.1:/usr/local/bin:/bin:/usr/local/cuda/bin:/usr/bin${PATH:+:${PATH}}" 60 | 61 | RUN pip3 install --no-cache-dir MDAnalysis 62 | 63 | #ADD nways_labs/ /labs 64 | WORKDIR /labs 65 | CMD jupyter-lab --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/labs 66 | -------------------------------------------------------------------------------- /_basic/_start_nways.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## N Ways to GPU Programming\n", 8 | "\n", 9 | "## Learning Objectives\n", 10 | "With the release of NVIDIA CUDA® in 2007, different approaches to GPU programming have evolved. Each approach has its own advantages and disadvantages. By the end of this bootcamp session, participants will have a broader perspective on GPU programming approaches to help them select a programming model that better fits their application's needs and constraints. The bootcamp will teach how to accelerate a real-world scientific application using the following methods:\n", 11 | "* Standard: C++ stdpar, Fortran Do-Concurrent\n", 12 | "* Directives: OpenACC, OpenMP\n", 13 | "\n", 14 | "* Programming Language Extension: CUDA C, CUDA Fortran, Python CuPy, Python Numba\n", 15 | "\n", 16 | "Let's start by testing the CUDA Driver and GPU you are running the code on in this lab:" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "!nvidia-smi" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "### Bootcamp Outline\n", 33 | "\n", 34 | "During this lab, we will beporting mini-applications in Molecular Simulation (MD) domain to GPUs. You can choose to work with either version of this application. Please click on one of the below links to start N Ways to GPU Programming in **MD** for:\n", 35 | "\n", 36 | "- [C and Fortran](_common/_start_nways_C_Fortran.ipynb) \n", 37 | "- [Python](_common/_start_nways_python.ipynb) " 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Bootcamp Duration\n", 45 | "The lab material will be presented in an 8-hour session. A Link to the material is available for download at the end of the lab.\n", 46 | "\n", 47 | "### Content Level\n", 48 | "Beginner, Intermediate\n", 49 | "\n", 50 | "### Target Audience and Prerequisites\n", 51 | "The target audience for this lab is researchers, graduate students and developers interested in learning about various ways of GPU programming to accelerate their scientific applications.\n", 52 | "\n", 53 | "Basic experience with C/C++ or Python, or Fortran programming is needed. No GPU programming experience is required. \n", 54 | "\n", 55 | "--- \n", 56 | "\n", 57 | "## Licensing \n", 58 | "\n", 59 | "Copyright © 2022 OpenACC-Standard.org. This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). These materials may include references to hardware and software developed by other entities; all applicable licensing and copyrights apply." 60 | ] 61 | } 62 | ], 63 | "metadata": { 64 | "kernelspec": { 65 | "display_name": "Python 3", 66 | "language": "python", 67 | "name": "python3" 68 | }, 69 | "language_info": { 70 | "codemirror_mode": { 71 | "name": "ipython", 72 | "version": 3 73 | }, 74 | "file_extension": ".py", 75 | "mimetype": "text/x-python", 76 | "name": "python", 77 | "nbconvert_exporter": "python", 78 | "pygments_lexer": "ipython3", 79 | "version": "3.7.4" 80 | } 81 | }, 82 | "nbformat": 4, 83 | "nbformat_minor": 4 84 | } 85 | -------------------------------------------------------------------------------- /nways_Singularity_python: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | 3 | # To build the singularity container, run: $ singularity build --fakeroot nways_p.simg nways_Singularity_python 4 | # To copy the content of the container: $ singularity run nways_p.simg cp -rT /labs ~/labs 5 | # To run: $ singularity run --nv nways_p.simg jupyter-lab --notebook-dir=~/labs 6 | # Finally, open http://localhost:8888/ 7 | 8 | Bootstrap: docker 9 | #FROM: nvidia/cuda:11.2.2-devel-ubuntu20.04 10 | FROM: nvcr.io/nvidia/cuda:11.4.2-devel-ubuntu20.04 11 | 12 | %environment 13 | export XDG_RUNTIME_DIR= 14 | export PATH="$PATH:/usr/local/bin:/usr/bin" 15 | export PATH=/opt/nvidia/nsight-systems/2022.1.1/bin:/opt/nvidia/nsight-compute/2022.1.1:/bin:/usr/local/cuda/bin$PATH 16 | export LD_LIBRARY_PATH="/usr/include/python3.8:/usr/local/lib:/usr/local/lib/python3.8/dist-packages:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" 17 | 18 | 19 | %post 20 | build_tmp=$(mktemp -d) && cd ${build_tmp} 21 | 22 | ##### 23 | # Read https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212772 24 | apt-key del 7fa2af80 25 | apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub 26 | ##### 27 | 28 | apt-get -y update 29 | apt-get -y dist-upgrade 30 | DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends python3-dev \ 31 | m4 vim-nox emacs-nox nano zip \ 32 | python3-pip python3-setuptools nginx zip make build-essential libtbb-dev 33 | rm -rf /var/lib/apt/cache/* 34 | 35 | pip3 install --no-cache-dir -U install setuptools pip 36 | apt-get -y update 37 | apt-get -y install git nvidia-modprobe 38 | pip3 install 'chardet>=3.0.2,<3.1.0' 'idna>=2.5,<2.8' 'urllib3>=1.21.1,<1.24' 'certifi>=2017.4.17' 39 | pip3 install jupyterlab 40 | pip3 install ipywidgets 41 | pip3 install gdown 42 | pip3 install --upgrade numpy==1.21.1 43 | # pip3 install --upgrade numpy==1.19.5 44 | #pip3 install --no-cache-dir "cupy-cuda112==9.0.0" \ 45 | pip3 install --no-cache-dir "cupy-cuda114==10.3.1" \ 46 | numba==0.53.1 scipy 47 | 48 | #apt-get install --no-install-recommends -y build-essential 49 | 50 | python3 /labs/_common/dataset_python.py 51 | 52 | touch /labs/python/jupyter_notebook/cupy/RDF.dat 53 | touch /labs/python/jupyter_notebook/cupy/Pair_entropy.dat 54 | 55 | # NVIDIA nsight-systems-cli-2022.1.1, nsight-compute-2022.1.1 56 | apt-get update -y 57 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg wget 58 | # apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80 59 | wget -qO - https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/nvidia.pub | apt-key add - 60 | echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list 61 | apt-get update -y 62 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-cli-2022.1.1 nsight-compute-2022.1.1 63 | #rm -rf /var/lib/apt/lists/* 64 | 65 | 66 | 67 | apt-get install --no-install-recommends -y build-essential 68 | 69 | pip3 install --no-cache-dir MDAnalysis 70 | 71 | chmod -R 777 /labs/python/jupyter_notebook/cupy/RDF.dat 72 | chmod -R 777 /labs/python/jupyter_notebook/cupy/Pair_entropy.dat 73 | 74 | cd / 75 | rm -rf ${build_tmp} 76 | 77 | %files 78 | _basic/python/ /labs/python 79 | _basic/_common/ /labs/_common 80 | _basic/_start_nways.ipynb /labs 81 | 82 | %runscript 83 | "$@" 84 | 85 | %labels 86 | AUTHOR Tosin, Mozhgan 87 | -------------------------------------------------------------------------------- /_scripts/nways_Singularity_python: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 2 | 3 | # To build the singularity container, run: $ singularity build --fakeroot nways_p.simg nways_Singularity_python 4 | # To copy the content of the container: $ singularity run nways_p.simg cp -rT /labs ~/labs 5 | # To run: $ singularity run --nv nways_p.simg jupyter-lab --notebook-dir=~/labs 6 | # Finally, open http://localhost:8888/ 7 | 8 | Bootstrap: docker 9 | #FROM: nvidia/cuda:11.2.2-devel-ubuntu20.04 10 | FROM: nvcr.io/nvidia/cuda:11.4.2-devel-ubuntu20.04 11 | 12 | %environment 13 | export XDG_RUNTIME_DIR= 14 | export PATH="$PATH:/usr/local/bin:/usr/bin" 15 | export PATH=/opt/nvidia/nsight-systems/2022.1.1/bin:/opt/nvidia/nsight-compute/2022.1.1:/bin:/usr/local/cuda/bin$PATH 16 | export LD_LIBRARY_PATH="/usr/include/python3.8:/usr/local/lib:/usr/local/lib/python3.8/dist-packages:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" 17 | 18 | 19 | %post 20 | build_tmp=$(mktemp -d) && cd ${build_tmp} 21 | 22 | ##### 23 | # Read https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212772 24 | apt-key del 7fa2af80 25 | apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub 26 | ##### 27 | 28 | apt-get -y update 29 | apt-get -y dist-upgrade 30 | DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends python3-dev \ 31 | m4 vim-nox emacs-nox nano zip \ 32 | python3-pip python3-setuptools nginx zip make build-essential libtbb-dev 33 | rm -rf /var/lib/apt/cache/* 34 | 35 | pip3 install --no-cache-dir -U install setuptools pip 36 | apt-get -y update 37 | apt-get -y install git nvidia-modprobe 38 | pip3 install 'chardet>=3.0.2,<3.1.0' 'idna>=2.5,<2.8' 'urllib3>=1.21.1,<1.24' 'certifi>=2017.4.17' 39 | pip3 install jupyterlab 40 | pip3 install ipywidgets 41 | pip3 install gdown 42 | pip3 install --upgrade numpy==1.21.1 43 | # pip3 install --upgrade numpy==1.19.5 44 | #pip3 install --no-cache-dir "cupy-cuda112==9.0.0" \ 45 | pip3 install --no-cache-dir "cupy-cuda114==10.3.1" \ 46 | numba==0.53.1 scipy 47 | 48 | #apt-get install --no-install-recommends -y build-essential 49 | 50 | python3 /labs/_common/dataset_python.py 51 | 52 | touch /labs/python/jupyter_notebook/cupy/RDF.dat 53 | touch /labs/python/jupyter_notebook/cupy/Pair_entropy.dat 54 | 55 | # NVIDIA nsight-systems-cli-2022.1.1, nsight-compute-2022.1.1 56 | apt-get update -y 57 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg wget 58 | # apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80 59 | wget -qO - https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/nvidia.pub | apt-key add - 60 | echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list 61 | apt-get update -y 62 | DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-cli-2022.1.1 nsight-compute-2022.1.1 63 | #rm -rf /var/lib/apt/lists/* 64 | 65 | 66 | 67 | apt-get install --no-install-recommends -y build-essential 68 | 69 | pip3 install --no-cache-dir MDAnalysis 70 | 71 | chmod -R 777 /labs/python/jupyter_notebook/cupy/RDF.dat 72 | chmod -R 777 /labs/python/jupyter_notebook/cupy/Pair_entropy.dat 73 | 74 | cd / 75 | rm -rf ${build_tmp} 76 | 77 | %files 78 | ../_basic/python/ /labs/python 79 | ../_basic/_common/ /labs/_common 80 | ../_basic/_start_nways.ipynb /labs 81 | 82 | %runscript 83 | "$@" 84 | 85 | %labels 86 | AUTHOR Tosin, Mozhgan 87 | -------------------------------------------------------------------------------- /Deployment_Guide.md: -------------------------------------------------------------------------------- 1 | # N-ways to GPU programming Deployment Guide 2 | The N-Ways to GPU Programming Bootcamp covers the basics of GPU programming and provides an overview of different methods for porting scientific application to GPUs using NVIDIA® CUDA®, OpenACC, standard languages, OpenMP offloading, and/or CuPy and Numba. Throughout the bootcamp, attendees with learn how to analyze GPU-enabled applications using NVIDIA Nsight™ Systems and participate in hands-on activities to apply these learned skills to real-world problems. 3 | 4 | ## Deploying the materials 5 | 6 | ### Prerequisites 7 | To run this tutorial, you will need a machine with NVIDIA GPU. 8 | 9 | - Install the latest [Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) or [Singularity](https://sylabs.io/docs/). 10 | 11 | - Install the latest [NVIDIA Nsight™ Systems](https://developer.nvidia.com/nsight-systems). 12 | 13 | - The base containers required for the lab may require users to create an NGC account and generate an API key (https://docs.nvidia.com/ngc/ngc-catalog-user-guide/index.html#registering-activating-ngc-account) 14 | 15 | The material is also tested to be working with NVIDIA V100 and T4 GPUs, please contact us if you require assistance in deploying the content. 16 | 17 | 18 | ### Tested environment 19 | 20 | These materials was tested with both Docker and Singularity on both NVIDIA A100 and H100 GPUs in an x86-64 platform installed with a driver version of `550.90.07`. 21 | 22 | ### Deploying with container 23 | 24 | These materials can be deployed with either Docker or Singularity container, refer to the respective sections for the instructions. 25 | 26 | #### Docker Container 27 | 28 | To build a docker container, specify the dockerfile name using `-f` flag: 29 | `sudo docker build -f -t : .` 30 | 31 | For instance: 32 | 33 | - To build the docker container, for N-Ways to GPU Programming-Python, follow the below steps: 34 | 35 | 1. `sudo docker build -f nways_Dockerfile_python -t openhackathons:nways_python .` 36 | 2. `sudo docker run --rm -it --gpus=all -p 8888:8888 openhackathons:nways_python` 37 | 3. To access the labs, run: `jupyter-lab --ip 0.0.0.0 --port 8888 --no-browser --allow-root` 38 | 4. Now, open the jupyter lab in browser: http://localhost:8888, and start working on the lab by clicking on the `_start_nways.ipynb` notebook 39 | 40 | 41 | - To build the docker container, for N-Ways to GPU Programming-C-Fortran, follow the below steps: 42 | 43 | 1. `sudo docker build -f nways_Dockerfile -t openhackathons:nways_CFortran .` 44 | 2. `sudo docker run --rm -it --gpus=all -p 8888:8888 openhackathons:nways_CFortran` 45 | 3. To access the labs, run: `jupyter-lab --ip 0.0.0.0 --port 8888 --no-browser --allow-root` 46 | 4. Now, open the jupyter lab in browser: http://localhost:8888, and start working on the lab by clicking on the `_start_nways.ipynb` notebook 47 | 48 | Please note, if you are to run both contents, you would need to change the ports to access them seperately. 49 | 50 | #### Singularity Container 51 | 52 | - To build the singularity container, for N-Ways to GPU Programming-Python, follow the below steps: 53 | 54 | 1. `singularity build --fakeroot nways_python.simg nways_Singularity_python` 55 | 2. `singularity run nways_python.simg cp -rT /labs ~/labs` 56 | 3. `singularity run --nv nways_python.simg jupyter-lab --notebook-dir=~/labs` 57 | 4. Now, open the jupyter lab in browser: http://localhost:8888, and start working on the lab by clicking on the `_start_nways.ipynb` notebook 58 | 59 | 60 | - To build the singularity container, for N-Ways to GPU Programming-C-Fortran, follow the below steps: 61 | 62 | 1. `singularity build --fakeroot nways_CFortran.simg nways_Singularity` 63 | 2. `singularity run nways_CFortran.simg cp -rT /labs ~/labs` 64 | 3. `singularity run --nv nways_CFortran.simg jupyter-lab --notebook-dir=~/labs` 65 | 4. Now, open the jupyter lab in browser: http://localhost:8888, and start working on the lab by clicking on the `_start_nways.ipynb` notebook 66 | 67 | ### Known issues 68 | 69 | - Please go through the list of exisiting bugs/issues or file a new issue at [Github](https://github.com/openhackathons-org/nways_accelerated_programming/issues). 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /_basic/iso/source_code/SOLUTION/rdf.f90: -------------------------------------------------------------------------------- 1 | !///////////////////////////////////////////////////////////////////////////////////////// 2 | !// Author: Manish Agarwal and Gourav Shrivastava , IIT Delhi 3 | !///////////////////////////////////////////////////////////////////////////////////////// 4 | 5 | ! Copyright (c) 2021 NVIDIA Corporation. All rights reserved. 6 | 7 | module readdata 8 | contains 9 | subroutine readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes) 10 | integer i,j 11 | integer maxframes,maxatoms 12 | 13 | double precision d(6),xbox,ybox,zbox 14 | real*4, allocatable :: x(:,:) 15 | real*4, allocatable :: y(:,:) 16 | real*4, allocatable :: z(:,:) 17 | 18 | real*4 dummyr 19 | integer*4 nset, natoms, dummyi,nframes,tframes 20 | character*4 dummyc 21 | 22 | open(10,file='../../_common/input/alk.traj.dcd',status='old',form='unformatted') 23 | read(10) dummyc, tframes,(dummyi,i=1,8),dummyr, (dummyi,i=1,9) 24 | read(10) dummyi, dummyr,dummyr 25 | read(10) natoms 26 | print*,"Total number of frames and atoms are",tframes,natoms 27 | 28 | allocate ( x(natoms,maxframes) ) 29 | allocate ( y(natoms,maxframes) ) 30 | allocate ( z(natoms,maxframes) ) 31 | 32 | do j = 1,nframes 33 | read(10) (d(i),i=1, 6) 34 | 35 | read(10) (x(i,j),i=1,natoms) 36 | read(10) (y(i,j),i=1,natoms) 37 | read(10) (z(i,j),i=1,natoms) 38 | end do 39 | 40 | xbox=d(1) 41 | ybox=d(3) 42 | zbox=d(6) 43 | 44 | print*,"File reading is done: xbox,ybox,zbox",xbox,ybox,zbox 45 | return 46 | 47 | end subroutine readdcd 48 | end module readdata 49 | 50 | program rdf 51 | use readdata 52 | use nvtx 53 | implicit none 54 | integer n,i,j,iconf,ind 55 | integer natoms,nframes,nbin 56 | integer maxframes,maxatoms 57 | parameter (maxframes=10,maxatoms=60000,nbin=2000) 58 | real*4, allocatable :: x(:,:) 59 | real*4, allocatable :: y(:,:) 60 | real*4, allocatable :: z(:,:) 61 | double precision dx,dy,dz 62 | double precision xbox,ybox,zbox,cut 63 | double precision vol,r,del,s2,s2bond 64 | double precision, allocatable :: g(:) 65 | double precision rho,gr,lngr,lngrbond,pi,const,nideal,rf 66 | double precision rlower,rupper 67 | character atmnm*4 68 | real*4 start,finish 69 | 70 | open(23,file='RDF.dat',status='unknown') 71 | open(24,file='Pair_entropy.dat',status='unknown') 72 | 73 | nframes=10 74 | 75 | call cpu_time(start) 76 | 77 | print*,"Going to read coordinates" 78 | call nvtxStartRange("Read File") 79 | call readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes) 80 | call nvtxEndRange 81 | 82 | allocate ( g(nbin) ) 83 | g = 0.0d0 84 | 85 | pi=dacos(-1.0d0) 86 | vol=xbox*ybox*zbox 87 | rho=dble(natoms)/vol 88 | 89 | del=xbox/dble(2.0*nbin) 90 | write(*,*) "bin width is : ",del 91 | cut = dble(xbox * 0.5); 92 | 93 | !pair calculation 94 | call nvtxStartRange("Pair Calculation") 95 | do iconf=1,nframes 96 | if (mod(iconf,1).eq.0) print*,iconf 97 | do concurrent(i=1:natoms, j=1:natoms) 98 | dx=x(i,iconf)-x(j,iconf) 99 | dy=y(i,iconf)-y(j,iconf) 100 | dz=z(i,iconf)-z(j,iconf) 101 | 102 | dx=dx-nint(dx/xbox)*xbox 103 | dy=dy-nint(dy/ybox)*ybox 104 | dz=dz-nint(dz/zbox)*zbox 105 | 106 | r=dsqrt(dx**2+dy**2+dz**2) 107 | if(r