├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake └── Modules │ └── FindNCCL.cmake ├── docs └── _static │ └── image │ ├── FlashOverlap_LOGO.png │ └── typical_timeline.jpeg ├── example ├── RMSNorm.py ├── RowParallelLinear.py ├── correctness_ar.py ├── correctness_rs.py └── utils.py ├── src ├── CMakeLists.txt ├── baseline_impl.cu ├── baseline_impl.h ├── gemm │ ├── gemm.cu │ └── gemm.h ├── inc │ ├── gemm_instances.inc │ ├── monitor_instances.inc │ ├── scatter_instances.inc │ └── signal_instances.inc ├── nccl_utils.cu ├── nccl_utils.h ├── overlap │ ├── gemm_scatter.cu │ ├── gemm_scatter.h │ ├── gemm_signal.cu │ ├── gemm_signal.h │ ├── gemm_with_epilogue_visitor.h │ ├── gemm_with_scatter.h │ └── gemm_with_signal.h ├── overlap_impl.cu ├── overlap_impl.h ├── pybind.cpp ├── rmsnorm │ ├── rmsnorm.cu │ ├── rmsnorm.cuh │ ├── rmsnorm.h │ └── utils.h ├── tiling │ ├── gemm_dispatcher.h │ ├── gemm_tiling.cuh │ ├── scatter_tiling.cuh │ └── signal_tiling.cuh └── wait.cuh ├── test └── test.py ├── tool └── generate_instances.py └── tune ├── bandwidth.py ├── gen_config.py ├── profile_config.py └── search.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/.gitmodules -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/README.md -------------------------------------------------------------------------------- /cmake/Modules/FindNCCL.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/cmake/Modules/FindNCCL.cmake -------------------------------------------------------------------------------- /docs/_static/image/FlashOverlap_LOGO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/docs/_static/image/FlashOverlap_LOGO.png -------------------------------------------------------------------------------- /docs/_static/image/typical_timeline.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/docs/_static/image/typical_timeline.jpeg -------------------------------------------------------------------------------- /example/RMSNorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/example/RMSNorm.py -------------------------------------------------------------------------------- /example/RowParallelLinear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/example/RowParallelLinear.py -------------------------------------------------------------------------------- /example/correctness_ar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/example/correctness_ar.py -------------------------------------------------------------------------------- /example/correctness_rs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/example/correctness_rs.py -------------------------------------------------------------------------------- /example/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/example/utils.py -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/CMakeLists.txt -------------------------------------------------------------------------------- /src/baseline_impl.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/baseline_impl.cu -------------------------------------------------------------------------------- /src/baseline_impl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/baseline_impl.h -------------------------------------------------------------------------------- /src/gemm/gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/gemm/gemm.cu -------------------------------------------------------------------------------- /src/gemm/gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/gemm/gemm.h -------------------------------------------------------------------------------- /src/inc/gemm_instances.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/inc/gemm_instances.inc -------------------------------------------------------------------------------- /src/inc/monitor_instances.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/inc/monitor_instances.inc -------------------------------------------------------------------------------- /src/inc/scatter_instances.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/inc/scatter_instances.inc -------------------------------------------------------------------------------- /src/inc/signal_instances.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/inc/signal_instances.inc -------------------------------------------------------------------------------- /src/nccl_utils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/nccl_utils.cu -------------------------------------------------------------------------------- /src/nccl_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/nccl_utils.h -------------------------------------------------------------------------------- /src/overlap/gemm_scatter.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/overlap/gemm_scatter.cu -------------------------------------------------------------------------------- /src/overlap/gemm_scatter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/overlap/gemm_scatter.h -------------------------------------------------------------------------------- /src/overlap/gemm_signal.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/overlap/gemm_signal.cu -------------------------------------------------------------------------------- /src/overlap/gemm_signal.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/overlap/gemm_signal.h -------------------------------------------------------------------------------- /src/overlap/gemm_with_epilogue_visitor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/overlap/gemm_with_epilogue_visitor.h -------------------------------------------------------------------------------- /src/overlap/gemm_with_scatter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/overlap/gemm_with_scatter.h -------------------------------------------------------------------------------- /src/overlap/gemm_with_signal.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/overlap/gemm_with_signal.h -------------------------------------------------------------------------------- /src/overlap_impl.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/overlap_impl.cu -------------------------------------------------------------------------------- /src/overlap_impl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/overlap_impl.h -------------------------------------------------------------------------------- /src/pybind.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/pybind.cpp -------------------------------------------------------------------------------- /src/rmsnorm/rmsnorm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/rmsnorm/rmsnorm.cu -------------------------------------------------------------------------------- /src/rmsnorm/rmsnorm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/rmsnorm/rmsnorm.cuh -------------------------------------------------------------------------------- /src/rmsnorm/rmsnorm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/rmsnorm/rmsnorm.h -------------------------------------------------------------------------------- /src/rmsnorm/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/rmsnorm/utils.h -------------------------------------------------------------------------------- /src/tiling/gemm_dispatcher.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/tiling/gemm_dispatcher.h -------------------------------------------------------------------------------- /src/tiling/gemm_tiling.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/tiling/gemm_tiling.cuh -------------------------------------------------------------------------------- /src/tiling/scatter_tiling.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/tiling/scatter_tiling.cuh -------------------------------------------------------------------------------- /src/tiling/signal_tiling.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/tiling/signal_tiling.cuh -------------------------------------------------------------------------------- /src/wait.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/src/wait.cuh -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/test/test.py -------------------------------------------------------------------------------- /tool/generate_instances.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/tool/generate_instances.py -------------------------------------------------------------------------------- /tune/bandwidth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/tune/bandwidth.py -------------------------------------------------------------------------------- /tune/gen_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/tune/gen_config.py -------------------------------------------------------------------------------- /tune/profile_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/tune/profile_config.py -------------------------------------------------------------------------------- /tune/search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infinigence/FlashOverlap/HEAD/tune/search.py --------------------------------------------------------------------------------