├── .dockerignore ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── amd64_10-1.dockerfile ├── ci ├── build.sh ├── env.sh └── install_deps.sh ├── coalescing ├── CMakeLists.txt ├── README.md ├── common.hpp ├── include │ └── argparse │ │ └── argparse.hpp ├── main.cu └── rai_build.yml ├── ppc64le_10-1.dockerfile ├── sgemm ├── 1_1_pinned_basic.cu ├── 1_2_pinned_tiled.cu ├── 1_3_pinned_joint.cu ├── 2_1_pageable_basic.cu ├── 2_2_pinned_basic.cu ├── 2_3_pinned_tiled.cu ├── 2_4_pinned_tiled_overlap.cu ├── 2_5_pinned_joint.cu ├── 2_6_pinned_joint_overlap.cu ├── CMakeLists.txt ├── README.md ├── common.hpp ├── cpu.cpp ├── include │ └── argparse │ │ └── argparse.hpp └── rai_build.yml ├── slides ├── 20200416_ece408.pdf ├── 20200421_ece498_Nsight.pdf ├── GEMM-joint-tiling.ppt ├── memory_access_efficiency.pdf └── s22141-what-the-profiler-is-telling-you-how-to-get-the-most-performance-out-of-your-hardware.pdf └── test.cu /.dockerignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | sgemm/build 2 | *.deb 3 | *.run -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/.travis.yml -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/README.md -------------------------------------------------------------------------------- /amd64_10-1.dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/amd64_10-1.dockerfile -------------------------------------------------------------------------------- /ci/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/ci/build.sh -------------------------------------------------------------------------------- /ci/env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/ci/env.sh -------------------------------------------------------------------------------- /ci/install_deps.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/ci/install_deps.sh -------------------------------------------------------------------------------- /coalescing/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/coalescing/CMakeLists.txt -------------------------------------------------------------------------------- /coalescing/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/coalescing/README.md -------------------------------------------------------------------------------- /coalescing/common.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/coalescing/common.hpp -------------------------------------------------------------------------------- /coalescing/include/argparse/argparse.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/coalescing/include/argparse/argparse.hpp -------------------------------------------------------------------------------- /coalescing/main.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/coalescing/main.cu -------------------------------------------------------------------------------- /coalescing/rai_build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/coalescing/rai_build.yml -------------------------------------------------------------------------------- /ppc64le_10-1.dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/ppc64le_10-1.dockerfile -------------------------------------------------------------------------------- /sgemm/1_1_pinned_basic.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/1_1_pinned_basic.cu -------------------------------------------------------------------------------- /sgemm/1_2_pinned_tiled.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/1_2_pinned_tiled.cu -------------------------------------------------------------------------------- /sgemm/1_3_pinned_joint.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/1_3_pinned_joint.cu -------------------------------------------------------------------------------- /sgemm/2_1_pageable_basic.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/2_1_pageable_basic.cu -------------------------------------------------------------------------------- /sgemm/2_2_pinned_basic.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/2_2_pinned_basic.cu -------------------------------------------------------------------------------- /sgemm/2_3_pinned_tiled.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/2_3_pinned_tiled.cu -------------------------------------------------------------------------------- /sgemm/2_4_pinned_tiled_overlap.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/2_4_pinned_tiled_overlap.cu -------------------------------------------------------------------------------- /sgemm/2_5_pinned_joint.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/2_5_pinned_joint.cu -------------------------------------------------------------------------------- /sgemm/2_6_pinned_joint_overlap.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/2_6_pinned_joint_overlap.cu -------------------------------------------------------------------------------- /sgemm/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/CMakeLists.txt -------------------------------------------------------------------------------- /sgemm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/README.md -------------------------------------------------------------------------------- /sgemm/common.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/common.hpp -------------------------------------------------------------------------------- /sgemm/cpu.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/cpu.cpp -------------------------------------------------------------------------------- /sgemm/include/argparse/argparse.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/include/argparse/argparse.hpp -------------------------------------------------------------------------------- /sgemm/rai_build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/sgemm/rai_build.yml -------------------------------------------------------------------------------- /slides/20200416_ece408.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/slides/20200416_ece408.pdf -------------------------------------------------------------------------------- /slides/20200421_ece498_Nsight.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/slides/20200421_ece498_Nsight.pdf -------------------------------------------------------------------------------- /slides/GEMM-joint-tiling.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/slides/GEMM-joint-tiling.ppt -------------------------------------------------------------------------------- /slides/memory_access_efficiency.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/slides/memory_access_efficiency.pdf -------------------------------------------------------------------------------- /slides/s22141-what-the-profiler-is-telling-you-how-to-get-the-most-performance-out-of-your-hardware.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/slides/s22141-what-the-profiler-is-telling-you-how-to-get-the-most-performance-out-of-your-hardware.pdf -------------------------------------------------------------------------------- /test.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cwpearson/nvidia-performance-tools/HEAD/test.cu --------------------------------------------------------------------------------