├── .gitignore ├── .vscode └── settings.json ├── 01_Deep_Learning_Ecosystem ├── README.md └── assets │ ├── onnx.png │ ├── speed.png │ └── wandb.png ├── 02_Setup ├── README.md └── cuda-installer.sh ├── 03_C_and_C++_Review ├── .vscode │ └── settings.json ├── 01 Pointers │ ├── 01.c │ ├── 02.c │ ├── 03.c │ ├── 04.c │ ├── 05.c │ └── 06.c ├── 02 Custom Types │ ├── 01.c │ ├── 02.c │ └── 02.cpp ├── 03 Type Casting │ ├── 01.c │ └── README.md ├── 04 Macros & Global Vars │ └── 01.c ├── 05 Compilers │ └── README.md ├── 06 Makefiles │ ├── 01.c │ ├── 02.c │ ├── 03.cu │ ├── Makefile │ └── README.md ├── 07 Debuggers │ └── README.md ├── README.md ├── optional │ ├── 01.c │ ├── 02.c │ └── README.md └── pointers-simple.png ├── 04_Gentle_Intro_to_GPUs ├── README.md └── assets │ ├── cpu-vs-gpu.png │ ├── cpu.png │ ├── fpga.png │ ├── gpu.png │ ├── history01.png │ ├── history02.png │ ├── history03.png │ └── tpu.png ├── 05_Writing_your_First_Kernels ├── 01 CUDA Basics │ ├── 01_idxing.cu │ ├── Basics of Basics.md │ └── README.md ├── 02 Kernels │ ├── 00_vector_add_v1.cu │ ├── 01_vector_add_v2.cu │ ├── 02 matmul.cu │ └── README.md ├── 03 Profiling │ ├── 00 nvtx_matmul.cu │ ├── 01_naive_matmul.cu │ ├── 02_tiled_matmul.cu │ └── README.md ├── 04 Atomics │ ├── 00_atomicAdd.cu │ └── README.md ├── 05 Streams │ ├── 01_stream_basics.cu │ ├── 02_stream_advanced.cu │ └── README.md ├── README.md └── assets │ ├── SMs.png │ ├── barrier.png │ ├── bitshift1.png │ ├── cuda-square.png │ ├── cudamemcpy.png │ ├── cudasquare2.png │ ├── cudasquare3.png │ ├── dimidx.png │ ├── exampleknls.png │ ├── gpustats.png │ ├── idxing.png │ ├── lockstep.png │ ├── membandwidth.png │ ├── memhierarchy.png │ ├── memmodel.png │ ├── nsight-ui.png │ ├── prof1.png │ ├── prof2.png │ ├── prof3.png │ ├── schedulers.png │ ├── squareknl.png │ ├── threadblockgrid.png │ ├── triton1.png │ ├── triton2.png │ └── weft.png ├── 06_CUDA_APIs ├── 01 CUBLAS │ ├── 01 cuBLAS │ │ └── 01_Hgemm_Sgemm.cu │ ├── 02 cuBLASLt │ │ ├── 01_LtMatmul.cu │ │ ├── 02_compare.cu │ │ └── README.md │ ├── 03 cuBLASXt │ │ ├── 01_demo.cu │ │ └── 02_compare.cu │ └── README.md ├── 02 CUDNN │ ├── 00 Tanh.cu │ ├── 00 torch-compare.py │ ├── 01 Conv2d_NCHW.cu │ ├── 01 torch-compare.py │ ├── 02 compare_conv.cu │ └── README.md ├── 03 Larger Rigs or Datacenters │ └── README.md ├── README.md ├── assets │ ├── blas.png │ ├── cublas-vs-cublasxt.png │ ├── flashattn.png │ ├── knlfusion1.png │ └── knlfusion2.png └── optional │ ├── CUFFT │ └── README.md │ ├── CURAND │ └── README.md │ └── CUTLASS │ ├── README.md │ └── compare.cu ├── 07_Faster_Matmul ├── README.md ├── assets │ ├── caching.excalidraw │ ├── caching.png │ ├── comparison.excalidraw │ ├── comparison.png │ ├── kernel3_1.png │ ├── kernel3_2.png │ └── memoryspeeds.png └── unrolling_example.cu ├── 08_Triton ├── 01_vec_add.py ├── 02_softmax.c ├── 02_softmax.cu ├── 02_softmax.py └── README.md ├── 09_PyTorch_Extensions ├── README.md ├── polynomial_activation.py ├── polynomial_cuda.cu └── setup.py ├── 10_Final_Project ├── README.md └── assets │ ├── architecture.excalidraw │ ├── architecture.png │ └── mnist.png ├── 11_Extras ├── README.md └── assets │ ├── 8t-sram-cell.png │ ├── async.png │ ├── cheatsheet.md │ ├── dram-cell.png │ ├── nvcc.png │ └── sram-cell.png └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | 09_PyTorch_Extensions/build -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /01_Deep_Learning_Ecosystem/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/01_Deep_Learning_Ecosystem/README.md -------------------------------------------------------------------------------- /01_Deep_Learning_Ecosystem/assets/onnx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/01_Deep_Learning_Ecosystem/assets/onnx.png -------------------------------------------------------------------------------- /01_Deep_Learning_Ecosystem/assets/speed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/01_Deep_Learning_Ecosystem/assets/speed.png -------------------------------------------------------------------------------- /01_Deep_Learning_Ecosystem/assets/wandb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/01_Deep_Learning_Ecosystem/assets/wandb.png -------------------------------------------------------------------------------- /02_Setup/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/02_Setup/README.md -------------------------------------------------------------------------------- /02_Setup/cuda-installer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/02_Setup/cuda-installer.sh -------------------------------------------------------------------------------- /03_C_and_C++_Review/.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/.vscode/settings.json -------------------------------------------------------------------------------- /03_C_and_C++_Review/01 Pointers/01.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/01 Pointers/01.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/01 Pointers/02.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/01 Pointers/02.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/01 Pointers/03.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/01 Pointers/03.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/01 Pointers/04.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/01 Pointers/04.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/01 Pointers/05.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/01 Pointers/05.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/01 Pointers/06.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/01 Pointers/06.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/02 Custom Types/01.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/02 Custom Types/01.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/02 Custom Types/02.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/02 Custom Types/02.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/02 Custom Types/02.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/02 Custom Types/02.cpp -------------------------------------------------------------------------------- /03_C_and_C++_Review/03 Type Casting/01.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/03 Type Casting/01.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/03 Type Casting/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/03 Type Casting/README.md -------------------------------------------------------------------------------- /03_C_and_C++_Review/04 Macros & Global Vars/01.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/04 Macros & Global Vars/01.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/05 Compilers/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/05 Compilers/README.md -------------------------------------------------------------------------------- /03_C_and_C++_Review/06 Makefiles/01.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | printf("boo!\n"); 5 | } 6 | -------------------------------------------------------------------------------- /03_C_and_C++_Review/06 Makefiles/02.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | printf("boo x 2!\n"); 5 | } 6 | -------------------------------------------------------------------------------- /03_C_and_C++_Review/06 Makefiles/03.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/06 Makefiles/03.cu -------------------------------------------------------------------------------- /03_C_and_C++_Review/06 Makefiles/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/06 Makefiles/Makefile -------------------------------------------------------------------------------- /03_C_and_C++_Review/06 Makefiles/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/06 Makefiles/README.md -------------------------------------------------------------------------------- /03_C_and_C++_Review/07 Debuggers/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/07 Debuggers/README.md -------------------------------------------------------------------------------- /03_C_and_C++_Review/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/README.md -------------------------------------------------------------------------------- /03_C_and_C++_Review/optional/01.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/optional/01.c -------------------------------------------------------------------------------- /03_C_and_C++_Review/optional/02.c: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /03_C_and_C++_Review/optional/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/optional/README.md -------------------------------------------------------------------------------- /03_C_and_C++_Review/pointers-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/03_C_and_C++_Review/pointers-simple.png -------------------------------------------------------------------------------- /04_Gentle_Intro_to_GPUs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/04_Gentle_Intro_to_GPUs/README.md -------------------------------------------------------------------------------- /04_Gentle_Intro_to_GPUs/assets/cpu-vs-gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/04_Gentle_Intro_to_GPUs/assets/cpu-vs-gpu.png -------------------------------------------------------------------------------- /04_Gentle_Intro_to_GPUs/assets/cpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/04_Gentle_Intro_to_GPUs/assets/cpu.png -------------------------------------------------------------------------------- /04_Gentle_Intro_to_GPUs/assets/fpga.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/04_Gentle_Intro_to_GPUs/assets/fpga.png -------------------------------------------------------------------------------- /04_Gentle_Intro_to_GPUs/assets/gpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/04_Gentle_Intro_to_GPUs/assets/gpu.png -------------------------------------------------------------------------------- /04_Gentle_Intro_to_GPUs/assets/history01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/04_Gentle_Intro_to_GPUs/assets/history01.png -------------------------------------------------------------------------------- /04_Gentle_Intro_to_GPUs/assets/history02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/04_Gentle_Intro_to_GPUs/assets/history02.png -------------------------------------------------------------------------------- /04_Gentle_Intro_to_GPUs/assets/history03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/04_Gentle_Intro_to_GPUs/assets/history03.png -------------------------------------------------------------------------------- /04_Gentle_Intro_to_GPUs/assets/tpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/04_Gentle_Intro_to_GPUs/assets/tpu.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/01 CUDA Basics/01_idxing.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/01 CUDA Basics/01_idxing.cu -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/01 CUDA Basics/Basics of Basics.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/01 CUDA Basics/Basics of Basics.md -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/01 CUDA Basics/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/01 CUDA Basics/README.md -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/02 Kernels/00_vector_add_v1.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/02 Kernels/00_vector_add_v1.cu -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/02 Kernels/01_vector_add_v2.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/02 Kernels/01_vector_add_v2.cu -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/02 Kernels/02 matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/02 Kernels/02 matmul.cu -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/02 Kernels/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/02 Kernels/README.md -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/03 Profiling/00 nvtx_matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/03 Profiling/00 nvtx_matmul.cu -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/03 Profiling/01_naive_matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/03 Profiling/01_naive_matmul.cu -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/03 Profiling/02_tiled_matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/03 Profiling/02_tiled_matmul.cu -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/03 Profiling/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/03 Profiling/README.md -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/04 Atomics/00_atomicAdd.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/04 Atomics/00_atomicAdd.cu -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/04 Atomics/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/04 Atomics/README.md -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/05 Streams/01_stream_basics.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/05 Streams/01_stream_basics.cu -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/05 Streams/02_stream_advanced.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/05 Streams/02_stream_advanced.cu -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/05 Streams/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/05 Streams/README.md -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/README.md -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/SMs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/SMs.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/barrier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/barrier.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/bitshift1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/bitshift1.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/cuda-square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/cuda-square.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/cudamemcpy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/cudamemcpy.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/cudasquare2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/cudasquare2.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/cudasquare3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/cudasquare3.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/dimidx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/dimidx.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/exampleknls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/exampleknls.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/gpustats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/gpustats.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/idxing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/idxing.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/lockstep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/lockstep.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/membandwidth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/membandwidth.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/memhierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/memhierarchy.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/memmodel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/memmodel.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/nsight-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/nsight-ui.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/prof1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/prof1.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/prof2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/prof2.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/prof3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/prof3.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/schedulers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/schedulers.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/squareknl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/squareknl.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/threadblockgrid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/threadblockgrid.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/triton1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/triton1.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/triton2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/triton2.png -------------------------------------------------------------------------------- /05_Writing_your_First_Kernels/assets/weft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/05_Writing_your_First_Kernels/assets/weft.png -------------------------------------------------------------------------------- /06_CUDA_APIs/01 CUBLAS/01 cuBLAS/01_Hgemm_Sgemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/01 CUBLAS/01 cuBLAS/01_Hgemm_Sgemm.cu -------------------------------------------------------------------------------- /06_CUDA_APIs/01 CUBLAS/02 cuBLASLt/01_LtMatmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/01 CUBLAS/02 cuBLASLt/01_LtMatmul.cu -------------------------------------------------------------------------------- /06_CUDA_APIs/01 CUBLAS/02 cuBLASLt/02_compare.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/01 CUBLAS/02 cuBLASLt/02_compare.cu -------------------------------------------------------------------------------- /06_CUDA_APIs/01 CUBLAS/02 cuBLASLt/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/01 CUBLAS/02 cuBLASLt/README.md -------------------------------------------------------------------------------- /06_CUDA_APIs/01 CUBLAS/03 cuBLASXt/01_demo.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/01 CUBLAS/03 cuBLASXt/01_demo.cu -------------------------------------------------------------------------------- /06_CUDA_APIs/01 CUBLAS/03 cuBLASXt/02_compare.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/01 CUBLAS/03 cuBLASXt/02_compare.cu -------------------------------------------------------------------------------- /06_CUDA_APIs/01 CUBLAS/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/01 CUBLAS/README.md -------------------------------------------------------------------------------- /06_CUDA_APIs/02 CUDNN/00 Tanh.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/02 CUDNN/00 Tanh.cu -------------------------------------------------------------------------------- /06_CUDA_APIs/02 CUDNN/00 torch-compare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/02 CUDNN/00 torch-compare.py -------------------------------------------------------------------------------- /06_CUDA_APIs/02 CUDNN/01 Conv2d_NCHW.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/02 CUDNN/01 Conv2d_NCHW.cu -------------------------------------------------------------------------------- /06_CUDA_APIs/02 CUDNN/01 torch-compare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/02 CUDNN/01 torch-compare.py -------------------------------------------------------------------------------- /06_CUDA_APIs/02 CUDNN/02 compare_conv.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/02 CUDNN/02 compare_conv.cu -------------------------------------------------------------------------------- /06_CUDA_APIs/02 CUDNN/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/02 CUDNN/README.md -------------------------------------------------------------------------------- /06_CUDA_APIs/03 Larger Rigs or Datacenters/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/03 Larger Rigs or Datacenters/README.md -------------------------------------------------------------------------------- /06_CUDA_APIs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/README.md -------------------------------------------------------------------------------- /06_CUDA_APIs/assets/blas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/assets/blas.png -------------------------------------------------------------------------------- /06_CUDA_APIs/assets/cublas-vs-cublasxt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/assets/cublas-vs-cublasxt.png -------------------------------------------------------------------------------- /06_CUDA_APIs/assets/flashattn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/assets/flashattn.png -------------------------------------------------------------------------------- /06_CUDA_APIs/assets/knlfusion1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/assets/knlfusion1.png -------------------------------------------------------------------------------- /06_CUDA_APIs/assets/knlfusion2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/assets/knlfusion2.png -------------------------------------------------------------------------------- /06_CUDA_APIs/optional/CUFFT/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/optional/CUFFT/README.md -------------------------------------------------------------------------------- /06_CUDA_APIs/optional/CURAND/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/optional/CURAND/README.md -------------------------------------------------------------------------------- /06_CUDA_APIs/optional/CUTLASS/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/optional/CUTLASS/README.md -------------------------------------------------------------------------------- /06_CUDA_APIs/optional/CUTLASS/compare.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/06_CUDA_APIs/optional/CUTLASS/compare.cu -------------------------------------------------------------------------------- /07_Faster_Matmul/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/07_Faster_Matmul/README.md -------------------------------------------------------------------------------- /07_Faster_Matmul/assets/caching.excalidraw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/07_Faster_Matmul/assets/caching.excalidraw -------------------------------------------------------------------------------- /07_Faster_Matmul/assets/caching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/07_Faster_Matmul/assets/caching.png -------------------------------------------------------------------------------- /07_Faster_Matmul/assets/comparison.excalidraw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/07_Faster_Matmul/assets/comparison.excalidraw -------------------------------------------------------------------------------- /07_Faster_Matmul/assets/comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/07_Faster_Matmul/assets/comparison.png -------------------------------------------------------------------------------- /07_Faster_Matmul/assets/kernel3_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/07_Faster_Matmul/assets/kernel3_1.png -------------------------------------------------------------------------------- /07_Faster_Matmul/assets/kernel3_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/07_Faster_Matmul/assets/kernel3_2.png -------------------------------------------------------------------------------- /07_Faster_Matmul/assets/memoryspeeds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/07_Faster_Matmul/assets/memoryspeeds.png -------------------------------------------------------------------------------- /07_Faster_Matmul/unrolling_example.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/07_Faster_Matmul/unrolling_example.cu -------------------------------------------------------------------------------- /08_Triton/01_vec_add.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/08_Triton/01_vec_add.py -------------------------------------------------------------------------------- /08_Triton/02_softmax.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/08_Triton/02_softmax.c -------------------------------------------------------------------------------- /08_Triton/02_softmax.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/08_Triton/02_softmax.cu -------------------------------------------------------------------------------- /08_Triton/02_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/08_Triton/02_softmax.py -------------------------------------------------------------------------------- /08_Triton/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/08_Triton/README.md -------------------------------------------------------------------------------- /09_PyTorch_Extensions/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/09_PyTorch_Extensions/README.md -------------------------------------------------------------------------------- /09_PyTorch_Extensions/polynomial_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/09_PyTorch_Extensions/polynomial_activation.py -------------------------------------------------------------------------------- /09_PyTorch_Extensions/polynomial_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/09_PyTorch_Extensions/polynomial_cuda.cu -------------------------------------------------------------------------------- /09_PyTorch_Extensions/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/09_PyTorch_Extensions/setup.py -------------------------------------------------------------------------------- /10_Final_Project/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/10_Final_Project/README.md -------------------------------------------------------------------------------- /10_Final_Project/assets/architecture.excalidraw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/10_Final_Project/assets/architecture.excalidraw -------------------------------------------------------------------------------- /10_Final_Project/assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/10_Final_Project/assets/architecture.png -------------------------------------------------------------------------------- /10_Final_Project/assets/mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/10_Final_Project/assets/mnist.png -------------------------------------------------------------------------------- /11_Extras/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/11_Extras/README.md -------------------------------------------------------------------------------- /11_Extras/assets/8t-sram-cell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/11_Extras/assets/8t-sram-cell.png -------------------------------------------------------------------------------- /11_Extras/assets/async.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/11_Extras/assets/async.png -------------------------------------------------------------------------------- /11_Extras/assets/cheatsheet.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/11_Extras/assets/cheatsheet.md -------------------------------------------------------------------------------- /11_Extras/assets/dram-cell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/11_Extras/assets/dram-cell.png -------------------------------------------------------------------------------- /11_Extras/assets/nvcc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/11_Extras/assets/nvcc.png -------------------------------------------------------------------------------- /11_Extras/assets/sram-cell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/11_Extras/assets/sram-cell.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Infatoshi/cuda-course/HEAD/README.md --------------------------------------------------------------------------------