├── .celery ├── create-project-directory.sh ├── psql.sh ├── start-daemons.sh ├── stop-daemons.sh └── worker-service.sh ├── .ci ├── README.md ├── base.Dockerfile ├── build-debug.sh ├── build-for-torch.sh ├── build-release.sh ├── build-shim.sh ├── build-test.sh ├── build-triton-tester.sh ├── build-tune.sh ├── buildenv-triton_tester.Dockerfile ├── common-build.sh ├── common-git-https-origin.sh ├── common-setup-volume.sh ├── common-vars.sh ├── dockerscript-setup-repo.sh ├── releasesuite-git-head.sh ├── rocm.Dockerfile ├── run-ci-test.sh ├── run-test.sh ├── source.Dockerfile ├── torch-build.sh ├── triton-patch │ ├── README.md │ ├── docker-script-build.sh │ ├── docker-script-patch.sh │ ├── patch-20250521.sh │ └── patch-20250805.sh ├── triton-tester-build.sh ├── triton-tester-run.sh └── triton-wheel-build.sh ├── .clang-format ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── bindings ├── .clang-format ├── CMakeLists.txt ├── hipmemory.cc ├── hipruntime.cc ├── lazy_tensor │ ├── lazy_tensor_template.h │ └── main.cc ├── module.cc └── v3.cc ├── dockerfile ├── README.md ├── Technical.md ├── build.sh ├── input │ ├── docker-script-build.sh │ ├── docker-script-triton-tester-build.sh │ ├── install.sh │ ├── install_aotriton.sh │ ├── install_triton.sh │ ├── patch-0.7.1b │ │ ├── 0001-Use-default-C-compiler.patch │ │ ├── 0002-Link-to-hip-host.patch │ │ └── 0003-Fix-error-designator-order-does-not-match-declaratio.patch │ ├── patch-0.7b │ │ ├── 0001-Use-default-C-compiler.patch │ │ ├── 0002-Link-to-hip-host.patch │ │ └── 0003-Fix-error-designator-order-does-not-match-declaratio.patch │ ├── print_hip_version.h │ └── print_rocm_version.h └── manylinux_2_28.Dockerfile ├── docs ├── AltWheelExample.yaml ├── How To Generate Tuning Database.md ├── How To Run Tests.md └── How To Update Constraints of Tuning Database.md ├── include └── aotriton │ ├── _internal │ ├── aiter_hip_common.h │ ├── flash │ │ └── aiter.h │ ├── kernel_cluster.h │ ├── lazy_tensor_internal.h │ ├── packed_kernel.h │ ├── triton_kernel.h │ └── util.h │ ├── config.h.in │ ├── cpp_tune.h │ ├── dtypes.h │ ├── flash.h │ ├── runtime.h │ ├── util.h │ └── v2 │ └── flash.h ├── requirements-dev.txt ├── requirements-tuning.txt ├── requirements.txt ├── test ├── _common_test.py ├── _core_test_backward.py ├── adiffs │ ├── gfx1100.txt │ ├── gfx1201.txt │ ├── gfx90a.txt │ ├── gfx942.txt │ └── gfx950.txt ├── aotriton_flash.py ├── attn_torch_function.py ├── bwd_preprocess.py ├── bwd_split_kernel.py ├── fwd_kernel.py ├── mapseqlen.py ├── mptune │ ├── __init__.py │ ├── core │ │ ├── __init__.py │ │ ├── aav.py │ │ ├── cpp_autotune.py │ │ ├── datatypes.py │ │ ├── db_accessor.py │ │ ├── manager.py │ │ ├── message.py │ │ ├── monad.py │ │ ├── rocm_arch.py │ │ ├── state_tracker.py │ │ └── tuner.py │ ├── flash │ │ ├── __init__.py │ │ ├── _common_test.py │ │ ├── benchmark.py │ │ ├── db_accessor.py │ │ ├── db_benchmark.py │ │ └── tuner.py │ └── tui │ │ ├── __init__.py │ │ ├── main.py │ │ ├── tunerapp.py │ │ └── tunerapp.tcss ├── optune_flash.py ├── performance_backward.py ├── performance_forward.py ├── pytest2entry.py ├── rocm_arch.py ├── test_backward.py ├── test_find_package │ └── CMakeLists.txt ├── test_forward.py ├── test_varlen.py ├── triton_attn_torch_function.py ├── triton_backward.py ├── triton_forward.py ├── triton_tester.py ├── tune_flash.py ├── v1_test_compile.cc └── varlen_attn_torch_function.py ├── tritonsrc ├── _common_backward.py ├── _common_test.py ├── attn_torch_function.py ├── bwd_inner_dk_dv.py ├── bwd_inner_dq.py ├── bwd_inner_fuse.py ├── bwd_kernel_dk_dv.py ├── bwd_kernel_dq.py ├── bwd_kernel_fuse.py ├── bwd_postprocess.py ├── bwd_preprocess.py ├── bwd_split_kernel.py ├── composed_tensors.py ├── dropout.py ├── dropout_rng.py ├── flash.py ├── fwd_kernel.py ├── fwd_kernel_inner.py ├── masked_load_store.py ├── performance_backward.py ├── performance_forward.py ├── rocm_arch.py ├── sized_tuned_bwd.py ├── test_backward.py ├── test_forward.py ├── test_minmal_backward.py ├── test_varlen.py ├── tune_flash.py ├── tuned_bwd.py └── varlen_attn_torch_function.py ├── v2python ├── __init__.py ├── aks2.py ├── autotune_binning.py ├── autotune_config.py ├── common_tuning_database.py ├── compile.py ├── conditional_value.py ├── downgrader.py ├── generate_compile.py ├── generate_shim.py ├── gpu_targets.py ├── json_tuning_database.py ├── kernel_argument.py ├── kernel_desc.py ├── kernel_signature.py ├── ld_script.py ├── object_desc.py ├── rules │ ├── __init__.py │ ├── flash │ │ ├── __init__.py │ │ ├── _common.py │ │ ├── attn_fwd.py │ │ ├── bwd_kernel_dk_dv.py │ │ ├── bwd_kernel_dq.py │ │ ├── bwd_kernel_fuse.py │ │ ├── bwd_preprocess.py │ │ ├── debug_fill_dropout_rng.py │ │ └── debug_simulate_encoded_softmax.py │ └── tuning_database.sqlite3.tar.xz ├── sqlite_tuning_database.py ├── table_tool.py ├── tuning_database.py └── tuning_lut.py ├── v2src ├── CMakeLists.txt ├── config.cmake.in ├── flash │ ├── attn_bwd.cc │ ├── attn_bwd_fused.cc │ ├── attn_check.cc │ ├── attn_debug.cc │ └── attn_fwd.cc ├── packed_kernel.cc ├── template │ ├── autotune_table_entry.cc │ ├── shim.cc │ └── shim.h ├── triton_kernel.cc └── util.cc ├── v3python ├── __init__.py ├── affine │ ├── __init__.py │ ├── akdesc.py │ ├── csv_translator.py │ └── dkarg.py ├── aks2.py ├── autotune │ ├── __init__.py │ ├── binning.py │ └── config.py ├── base │ ├── README.md │ ├── __init__.py │ ├── bind.py │ ├── cfield.py │ ├── conditional_value.py │ ├── functional.py │ ├── interface.py │ ├── parameter.py │ └── typed_choice.py ├── celery │ ├── __init__.py │ ├── celery.py │ ├── celeryconfig.py │ └── tasks.py ├── codegen │ ├── __init__.py │ ├── affine.py │ ├── affine_cap.py │ ├── autotune.py │ ├── basetune.py │ ├── common.py │ ├── interface.py │ ├── kernel.py │ ├── operator.py │ ├── optune.py │ ├── root.py │ ├── template.py │ └── template │ │ ├── affine.cc │ │ ├── affine.h │ │ ├── autotune_table_entry.cc │ │ ├── kshim_launcher.cc │ │ ├── metro_launcher.cc │ │ ├── op.cc │ │ ├── op.h │ │ ├── optune_table_entry.cc │ │ ├── shim.cc │ │ ├── shim.h │ │ └── snippet │ │ ├── metro_launch_kernel.cc │ │ ├── metro_launch_kernel_ifelse.cc │ │ ├── metro_per_kernel.cc │ │ ├── metro_per_kernel_ifelse.cc │ │ └── validator.cc ├── comment_only_asm.py ├── compile.py ├── database │ ├── __init__.py │ ├── amd │ │ ├── gfx1100 │ │ │ └── flash │ │ │ │ ├── attn_fwd.sqlite3.tar.xz │ │ │ │ ├── bwd_kernel_dk_dv.sqlite3.tar.xz │ │ │ │ ├── bwd_kernel_dq.sqlite3.tar.xz │ │ │ │ └── bwd_kernel_fuse.sqlite3.tar.xz │ │ ├── gfx1201 │ │ │ └── flash │ │ │ │ ├── attn_fwd.sqlite3.tar.xz │ │ │ │ ├── bwd_kernel_dk_dv.sqlite3.tar.xz │ │ │ │ ├── bwd_kernel_dq.sqlite3.tar.xz │ │ │ │ └── bwd_kernel_fuse.sqlite3.tar.xz │ │ ├── gfx90a │ │ │ └── flash │ │ │ │ ├── attn_fwd.sqlite3.tar.xz │ │ │ │ ├── bwd_kernel_dk_dv.sqlite3.tar.xz │ │ │ │ ├── bwd_kernel_dq.sqlite3.tar.xz │ │ │ │ └── bwd_kernel_fuse.sqlite3.tar.xz │ │ ├── gfx942 │ │ │ └── flash │ │ │ │ ├── attn_fwd.sqlite3.tar.xz │ │ │ │ ├── bwd_kernel_dk_dv.sqlite3.tar.xz │ │ │ │ ├── bwd_kernel_dq.sqlite3.tar.xz │ │ │ │ └── bwd_kernel_fuse.sqlite3.tar.xz │ │ └── gfx950 │ │ │ └── flash │ │ │ ├── attn_fwd.sqlite3.tar.xz │ │ │ ├── bwd_kernel_dk_dv.sqlite3.tar.xz │ │ │ ├── bwd_kernel_dq.sqlite3.tar.xz │ │ │ └── bwd_kernel_fuse.sqlite3.tar.xz │ ├── factories.py │ ├── op_database.sqlite3.tar.xz │ ├── sqlite.py │ ├── tuning_database.sqlite3.tar.xz │ └── view.py ├── database_compose.py ├── database_decompose.py ├── generate.py ├── gpu_targets.py ├── kernel │ ├── __init__.py │ ├── kdesc.py │ ├── ksignature.py │ └── object_desc.py ├── op │ ├── __init__.py │ ├── conditional.py │ ├── metro.py │ └── operator.py ├── rules │ ├── .gitignore │ ├── __init__.py │ └── flash │ │ ├── __init__.py │ │ ├── _common.py │ │ ├── aiter_bwd.csv │ │ ├── aiter_bwd.py │ │ ├── attn_fwd.py │ │ ├── bwd_kernel_dk_dv.py │ │ ├── bwd_kernel_dq.py │ │ ├── bwd_kernel_fuse.py │ │ ├── bwd_postprocess.py │ │ ├── bwd_preprocess.py │ │ ├── debug_fill_dropout_rng.py │ │ ├── debug_simulate_encoded_softmax.py │ │ ├── op_attn_bwd.py │ │ ├── op_attn_fwd.py │ │ └── ops.py ├── tune │ ├── defaults.py │ ├── exaid.py │ ├── flash │ │ ├── __init__.py │ │ ├── flash.py │ │ ├── kernels.py │ │ ├── reference.py │ │ └── utils.py │ ├── gpu_utils.py │ ├── kftdesc.py │ ├── tdesc.py │ ├── test_exaid.py │ ├── testrun.py │ └── utils.py ├── utils │ ├── __init__.py │ ├── dict2json.py │ ├── lazy_file.py │ ├── log.py │ └── registry.py └── write_image_signature.py └── v3src ├── CMakeLists.txt ├── config.cmake.in ├── flash ├── aiter_bwd.cc ├── attn_bwd.cc ├── attn_bwd_fused.cc ├── attn_check.cc ├── attn_debug.cc └── attn_fwd.cc ├── packed_kernel_common.cc ├── packed_kernel_unix.h ├── packed_kernel_win32.h ├── triton_kernel.cc └── util.cc /.celery/create-project-directory.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.celery/create-project-directory.sh -------------------------------------------------------------------------------- /.celery/psql.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.celery/psql.sh -------------------------------------------------------------------------------- /.celery/start-daemons.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.celery/start-daemons.sh -------------------------------------------------------------------------------- /.celery/stop-daemons.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.celery/stop-daemons.sh -------------------------------------------------------------------------------- /.celery/worker-service.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.celery/worker-service.sh -------------------------------------------------------------------------------- /.ci/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/README.md -------------------------------------------------------------------------------- /.ci/base.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/base.Dockerfile -------------------------------------------------------------------------------- /.ci/build-debug.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/build-debug.sh -------------------------------------------------------------------------------- /.ci/build-for-torch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/build-for-torch.sh -------------------------------------------------------------------------------- /.ci/build-release.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/build-release.sh -------------------------------------------------------------------------------- /.ci/build-shim.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/build-shim.sh -------------------------------------------------------------------------------- /.ci/build-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/build-test.sh -------------------------------------------------------------------------------- /.ci/build-triton-tester.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/build-triton-tester.sh -------------------------------------------------------------------------------- /.ci/build-tune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/build-tune.sh -------------------------------------------------------------------------------- /.ci/buildenv-triton_tester.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/buildenv-triton_tester.Dockerfile -------------------------------------------------------------------------------- /.ci/common-build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/common-build.sh -------------------------------------------------------------------------------- /.ci/common-git-https-origin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/common-git-https-origin.sh -------------------------------------------------------------------------------- /.ci/common-setup-volume.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/common-setup-volume.sh -------------------------------------------------------------------------------- /.ci/common-vars.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/common-vars.sh -------------------------------------------------------------------------------- /.ci/dockerscript-setup-repo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/dockerscript-setup-repo.sh -------------------------------------------------------------------------------- /.ci/releasesuite-git-head.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/releasesuite-git-head.sh -------------------------------------------------------------------------------- /.ci/rocm.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/rocm.Dockerfile -------------------------------------------------------------------------------- /.ci/run-ci-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/run-ci-test.sh -------------------------------------------------------------------------------- /.ci/run-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/run-test.sh -------------------------------------------------------------------------------- /.ci/source.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/source.Dockerfile -------------------------------------------------------------------------------- /.ci/torch-build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/torch-build.sh -------------------------------------------------------------------------------- /.ci/triton-patch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/triton-patch/README.md -------------------------------------------------------------------------------- /.ci/triton-patch/docker-script-build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/triton-patch/docker-script-build.sh -------------------------------------------------------------------------------- /.ci/triton-patch/docker-script-patch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/triton-patch/docker-script-patch.sh -------------------------------------------------------------------------------- /.ci/triton-patch/patch-20250521.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/triton-patch/patch-20250521.sh -------------------------------------------------------------------------------- /.ci/triton-patch/patch-20250805.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/triton-patch/patch-20250805.sh -------------------------------------------------------------------------------- /.ci/triton-tester-build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/triton-tester-build.sh -------------------------------------------------------------------------------- /.ci/triton-tester-run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/triton-tester-run.sh -------------------------------------------------------------------------------- /.ci/triton-wheel-build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.ci/triton-wheel-build.sh -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.clang-format -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/.gitmodules -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/README.md -------------------------------------------------------------------------------- /bindings/.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/bindings/.clang-format -------------------------------------------------------------------------------- /bindings/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/bindings/CMakeLists.txt -------------------------------------------------------------------------------- /bindings/hipmemory.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/bindings/hipmemory.cc -------------------------------------------------------------------------------- /bindings/hipruntime.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/bindings/hipruntime.cc -------------------------------------------------------------------------------- /bindings/lazy_tensor/lazy_tensor_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/bindings/lazy_tensor/lazy_tensor_template.h -------------------------------------------------------------------------------- /bindings/lazy_tensor/main.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/bindings/lazy_tensor/main.cc -------------------------------------------------------------------------------- /bindings/module.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/bindings/module.cc -------------------------------------------------------------------------------- /bindings/v3.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/bindings/v3.cc -------------------------------------------------------------------------------- /dockerfile/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/README.md -------------------------------------------------------------------------------- /dockerfile/Technical.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/Technical.md -------------------------------------------------------------------------------- /dockerfile/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/build.sh -------------------------------------------------------------------------------- /dockerfile/input/docker-script-build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/docker-script-build.sh -------------------------------------------------------------------------------- /dockerfile/input/docker-script-triton-tester-build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/docker-script-triton-tester-build.sh -------------------------------------------------------------------------------- /dockerfile/input/install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/install.sh -------------------------------------------------------------------------------- /dockerfile/input/install_aotriton.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/install_aotriton.sh -------------------------------------------------------------------------------- /dockerfile/input/install_triton.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/install_triton.sh -------------------------------------------------------------------------------- /dockerfile/input/patch-0.7.1b/0001-Use-default-C-compiler.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/patch-0.7.1b/0001-Use-default-C-compiler.patch -------------------------------------------------------------------------------- /dockerfile/input/patch-0.7.1b/0002-Link-to-hip-host.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/patch-0.7.1b/0002-Link-to-hip-host.patch -------------------------------------------------------------------------------- /dockerfile/input/patch-0.7.1b/0003-Fix-error-designator-order-does-not-match-declaratio.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/patch-0.7.1b/0003-Fix-error-designator-order-does-not-match-declaratio.patch -------------------------------------------------------------------------------- /dockerfile/input/patch-0.7b/0001-Use-default-C-compiler.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/patch-0.7b/0001-Use-default-C-compiler.patch -------------------------------------------------------------------------------- /dockerfile/input/patch-0.7b/0002-Link-to-hip-host.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/patch-0.7b/0002-Link-to-hip-host.patch -------------------------------------------------------------------------------- /dockerfile/input/patch-0.7b/0003-Fix-error-designator-order-does-not-match-declaratio.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/patch-0.7b/0003-Fix-error-designator-order-does-not-match-declaratio.patch -------------------------------------------------------------------------------- /dockerfile/input/print_hip_version.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/print_hip_version.h -------------------------------------------------------------------------------- /dockerfile/input/print_rocm_version.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/input/print_rocm_version.h -------------------------------------------------------------------------------- /dockerfile/manylinux_2_28.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/dockerfile/manylinux_2_28.Dockerfile -------------------------------------------------------------------------------- /docs/AltWheelExample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/docs/AltWheelExample.yaml -------------------------------------------------------------------------------- /docs/How To Generate Tuning Database.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/docs/How To Generate Tuning Database.md -------------------------------------------------------------------------------- /docs/How To Run Tests.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/docs/How To Run Tests.md -------------------------------------------------------------------------------- /docs/How To Update Constraints of Tuning Database.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/docs/How To Update Constraints of Tuning Database.md -------------------------------------------------------------------------------- /include/aotriton/_internal/aiter_hip_common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/_internal/aiter_hip_common.h -------------------------------------------------------------------------------- /include/aotriton/_internal/flash/aiter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/_internal/flash/aiter.h -------------------------------------------------------------------------------- /include/aotriton/_internal/kernel_cluster.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/_internal/kernel_cluster.h -------------------------------------------------------------------------------- /include/aotriton/_internal/lazy_tensor_internal.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/_internal/lazy_tensor_internal.h -------------------------------------------------------------------------------- /include/aotriton/_internal/packed_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/_internal/packed_kernel.h -------------------------------------------------------------------------------- /include/aotriton/_internal/triton_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/_internal/triton_kernel.h -------------------------------------------------------------------------------- /include/aotriton/_internal/util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/_internal/util.h -------------------------------------------------------------------------------- /include/aotriton/config.h.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/config.h.in -------------------------------------------------------------------------------- /include/aotriton/cpp_tune.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/cpp_tune.h -------------------------------------------------------------------------------- /include/aotriton/dtypes.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/dtypes.h -------------------------------------------------------------------------------- /include/aotriton/flash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/flash.h -------------------------------------------------------------------------------- /include/aotriton/runtime.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/runtime.h -------------------------------------------------------------------------------- /include/aotriton/util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/util.h -------------------------------------------------------------------------------- /include/aotriton/v2/flash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/include/aotriton/v2/flash.h -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /requirements-tuning.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/requirements-tuning.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/requirements.txt -------------------------------------------------------------------------------- /test/_common_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/_common_test.py -------------------------------------------------------------------------------- /test/_core_test_backward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/_core_test_backward.py -------------------------------------------------------------------------------- /test/adiffs/gfx1100.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/adiffs/gfx1100.txt -------------------------------------------------------------------------------- /test/adiffs/gfx1201.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/adiffs/gfx1201.txt -------------------------------------------------------------------------------- /test/adiffs/gfx90a.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/adiffs/gfx90a.txt -------------------------------------------------------------------------------- /test/adiffs/gfx942.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/adiffs/gfx942.txt -------------------------------------------------------------------------------- /test/adiffs/gfx950.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/adiffs/gfx950.txt -------------------------------------------------------------------------------- /test/aotriton_flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/aotriton_flash.py -------------------------------------------------------------------------------- /test/attn_torch_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/attn_torch_function.py -------------------------------------------------------------------------------- /test/bwd_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/bwd_preprocess.py -------------------------------------------------------------------------------- /test/bwd_split_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/bwd_split_kernel.py -------------------------------------------------------------------------------- /test/fwd_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/fwd_kernel.py -------------------------------------------------------------------------------- /test/mapseqlen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mapseqlen.py -------------------------------------------------------------------------------- /test/mptune/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/__init__.py -------------------------------------------------------------------------------- /test/mptune/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/core/__init__.py -------------------------------------------------------------------------------- /test/mptune/core/aav.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/core/aav.py -------------------------------------------------------------------------------- /test/mptune/core/cpp_autotune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/core/cpp_autotune.py -------------------------------------------------------------------------------- /test/mptune/core/datatypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/core/datatypes.py -------------------------------------------------------------------------------- /test/mptune/core/db_accessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/core/db_accessor.py -------------------------------------------------------------------------------- /test/mptune/core/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/core/manager.py -------------------------------------------------------------------------------- /test/mptune/core/message.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/core/message.py -------------------------------------------------------------------------------- /test/mptune/core/monad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/core/monad.py -------------------------------------------------------------------------------- /test/mptune/core/rocm_arch.py: -------------------------------------------------------------------------------- 1 | ../../rocm_arch.py -------------------------------------------------------------------------------- /test/mptune/core/state_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/core/state_tracker.py -------------------------------------------------------------------------------- /test/mptune/core/tuner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/core/tuner.py -------------------------------------------------------------------------------- /test/mptune/flash/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/flash/__init__.py -------------------------------------------------------------------------------- /test/mptune/flash/_common_test.py: -------------------------------------------------------------------------------- 1 | ../../_common_test.py -------------------------------------------------------------------------------- /test/mptune/flash/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/flash/benchmark.py -------------------------------------------------------------------------------- /test/mptune/flash/db_accessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/flash/db_accessor.py -------------------------------------------------------------------------------- /test/mptune/flash/db_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/flash/db_benchmark.py -------------------------------------------------------------------------------- /test/mptune/flash/tuner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/flash/tuner.py -------------------------------------------------------------------------------- /test/mptune/tui/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/tui/__init__.py -------------------------------------------------------------------------------- /test/mptune/tui/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/tui/main.py -------------------------------------------------------------------------------- /test/mptune/tui/tunerapp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/tui/tunerapp.py -------------------------------------------------------------------------------- /test/mptune/tui/tunerapp.tcss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/mptune/tui/tunerapp.tcss -------------------------------------------------------------------------------- /test/optune_flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/optune_flash.py -------------------------------------------------------------------------------- /test/performance_backward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/performance_backward.py -------------------------------------------------------------------------------- /test/performance_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/performance_forward.py -------------------------------------------------------------------------------- /test/pytest2entry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/pytest2entry.py -------------------------------------------------------------------------------- /test/rocm_arch.py: -------------------------------------------------------------------------------- 1 | ../tritonsrc/rocm_arch.py -------------------------------------------------------------------------------- /test/test_backward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/test_backward.py -------------------------------------------------------------------------------- /test/test_find_package/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/test_find_package/CMakeLists.txt -------------------------------------------------------------------------------- /test/test_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/test_forward.py -------------------------------------------------------------------------------- /test/test_varlen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/test_varlen.py -------------------------------------------------------------------------------- /test/triton_attn_torch_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/triton_attn_torch_function.py -------------------------------------------------------------------------------- /test/triton_backward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/triton_backward.py -------------------------------------------------------------------------------- /test/triton_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/triton_forward.py -------------------------------------------------------------------------------- /test/triton_tester.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/triton_tester.py -------------------------------------------------------------------------------- /test/tune_flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/tune_flash.py -------------------------------------------------------------------------------- /test/v1_test_compile.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/v1_test_compile.cc -------------------------------------------------------------------------------- /test/varlen_attn_torch_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/test/varlen_attn_torch_function.py -------------------------------------------------------------------------------- /tritonsrc/_common_backward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/_common_backward.py -------------------------------------------------------------------------------- /tritonsrc/_common_test.py: -------------------------------------------------------------------------------- 1 | ../test/_common_test.py -------------------------------------------------------------------------------- /tritonsrc/attn_torch_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/attn_torch_function.py -------------------------------------------------------------------------------- /tritonsrc/bwd_inner_dk_dv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/bwd_inner_dk_dv.py -------------------------------------------------------------------------------- /tritonsrc/bwd_inner_dq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/bwd_inner_dq.py -------------------------------------------------------------------------------- /tritonsrc/bwd_inner_fuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/bwd_inner_fuse.py -------------------------------------------------------------------------------- /tritonsrc/bwd_kernel_dk_dv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/bwd_kernel_dk_dv.py -------------------------------------------------------------------------------- /tritonsrc/bwd_kernel_dq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/bwd_kernel_dq.py -------------------------------------------------------------------------------- /tritonsrc/bwd_kernel_fuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/bwd_kernel_fuse.py -------------------------------------------------------------------------------- /tritonsrc/bwd_postprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/bwd_postprocess.py -------------------------------------------------------------------------------- /tritonsrc/bwd_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/bwd_preprocess.py -------------------------------------------------------------------------------- /tritonsrc/bwd_split_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/bwd_split_kernel.py -------------------------------------------------------------------------------- /tritonsrc/composed_tensors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/composed_tensors.py -------------------------------------------------------------------------------- /tritonsrc/dropout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/dropout.py -------------------------------------------------------------------------------- /tritonsrc/dropout_rng.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/dropout_rng.py -------------------------------------------------------------------------------- /tritonsrc/flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/flash.py -------------------------------------------------------------------------------- /tritonsrc/fwd_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/fwd_kernel.py -------------------------------------------------------------------------------- /tritonsrc/fwd_kernel_inner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/fwd_kernel_inner.py -------------------------------------------------------------------------------- /tritonsrc/masked_load_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/masked_load_store.py -------------------------------------------------------------------------------- /tritonsrc/performance_backward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/performance_backward.py -------------------------------------------------------------------------------- /tritonsrc/performance_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/performance_forward.py -------------------------------------------------------------------------------- /tritonsrc/rocm_arch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/rocm_arch.py -------------------------------------------------------------------------------- /tritonsrc/sized_tuned_bwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/sized_tuned_bwd.py -------------------------------------------------------------------------------- /tritonsrc/test_backward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/test_backward.py -------------------------------------------------------------------------------- /tritonsrc/test_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/test_forward.py -------------------------------------------------------------------------------- /tritonsrc/test_minmal_backward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/test_minmal_backward.py -------------------------------------------------------------------------------- /tritonsrc/test_varlen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/test_varlen.py -------------------------------------------------------------------------------- /tritonsrc/tune_flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/tune_flash.py -------------------------------------------------------------------------------- /tritonsrc/tuned_bwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/tuned_bwd.py -------------------------------------------------------------------------------- /tritonsrc/varlen_attn_torch_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/tritonsrc/varlen_attn_torch_function.py -------------------------------------------------------------------------------- /v2python/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /v2python/aks2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/aks2.py -------------------------------------------------------------------------------- /v2python/autotune_binning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/autotune_binning.py -------------------------------------------------------------------------------- /v2python/autotune_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/autotune_config.py -------------------------------------------------------------------------------- /v2python/common_tuning_database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/common_tuning_database.py -------------------------------------------------------------------------------- /v2python/compile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/compile.py -------------------------------------------------------------------------------- /v2python/conditional_value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/conditional_value.py -------------------------------------------------------------------------------- /v2python/downgrader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/downgrader.py -------------------------------------------------------------------------------- /v2python/generate_compile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/generate_compile.py -------------------------------------------------------------------------------- /v2python/generate_shim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/generate_shim.py -------------------------------------------------------------------------------- /v2python/gpu_targets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/gpu_targets.py -------------------------------------------------------------------------------- /v2python/json_tuning_database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/json_tuning_database.py -------------------------------------------------------------------------------- /v2python/kernel_argument.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/kernel_argument.py -------------------------------------------------------------------------------- /v2python/kernel_desc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/kernel_desc.py -------------------------------------------------------------------------------- /v2python/kernel_signature.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/kernel_signature.py -------------------------------------------------------------------------------- /v2python/ld_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/ld_script.py -------------------------------------------------------------------------------- /v2python/object_desc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/object_desc.py -------------------------------------------------------------------------------- /v2python/rules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/__init__.py -------------------------------------------------------------------------------- /v2python/rules/flash/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/flash/__init__.py -------------------------------------------------------------------------------- /v2python/rules/flash/_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/flash/_common.py -------------------------------------------------------------------------------- /v2python/rules/flash/attn_fwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/flash/attn_fwd.py -------------------------------------------------------------------------------- /v2python/rules/flash/bwd_kernel_dk_dv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/flash/bwd_kernel_dk_dv.py -------------------------------------------------------------------------------- /v2python/rules/flash/bwd_kernel_dq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/flash/bwd_kernel_dq.py -------------------------------------------------------------------------------- /v2python/rules/flash/bwd_kernel_fuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/flash/bwd_kernel_fuse.py -------------------------------------------------------------------------------- /v2python/rules/flash/bwd_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/flash/bwd_preprocess.py -------------------------------------------------------------------------------- /v2python/rules/flash/debug_fill_dropout_rng.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/flash/debug_fill_dropout_rng.py -------------------------------------------------------------------------------- /v2python/rules/flash/debug_simulate_encoded_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/flash/debug_simulate_encoded_softmax.py -------------------------------------------------------------------------------- /v2python/rules/tuning_database.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/rules/tuning_database.sqlite3.tar.xz -------------------------------------------------------------------------------- /v2python/sqlite_tuning_database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/sqlite_tuning_database.py -------------------------------------------------------------------------------- /v2python/table_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/table_tool.py -------------------------------------------------------------------------------- /v2python/tuning_database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/tuning_database.py -------------------------------------------------------------------------------- /v2python/tuning_lut.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2python/tuning_lut.py -------------------------------------------------------------------------------- /v2src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/CMakeLists.txt -------------------------------------------------------------------------------- /v2src/config.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | include ( "${CMAKE_CURRENT_LIST_DIR}/aotriton-targets.cmake" ) 3 | -------------------------------------------------------------------------------- /v2src/flash/attn_bwd.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/flash/attn_bwd.cc -------------------------------------------------------------------------------- /v2src/flash/attn_bwd_fused.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/flash/attn_bwd_fused.cc -------------------------------------------------------------------------------- /v2src/flash/attn_check.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/flash/attn_check.cc -------------------------------------------------------------------------------- /v2src/flash/attn_debug.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/flash/attn_debug.cc -------------------------------------------------------------------------------- /v2src/flash/attn_fwd.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/flash/attn_fwd.cc -------------------------------------------------------------------------------- /v2src/packed_kernel.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/packed_kernel.cc -------------------------------------------------------------------------------- /v2src/template/autotune_table_entry.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/template/autotune_table_entry.cc -------------------------------------------------------------------------------- /v2src/template/shim.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/template/shim.cc -------------------------------------------------------------------------------- /v2src/template/shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/template/shim.h -------------------------------------------------------------------------------- /v2src/triton_kernel.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/triton_kernel.cc -------------------------------------------------------------------------------- /v2src/util.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v2src/util.cc -------------------------------------------------------------------------------- /v3python/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /v3python/affine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/affine/__init__.py -------------------------------------------------------------------------------- /v3python/affine/akdesc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/affine/akdesc.py -------------------------------------------------------------------------------- /v3python/affine/csv_translator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/affine/csv_translator.py -------------------------------------------------------------------------------- /v3python/affine/dkarg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/affine/dkarg.py -------------------------------------------------------------------------------- /v3python/aks2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/aks2.py -------------------------------------------------------------------------------- /v3python/autotune/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/autotune/__init__.py -------------------------------------------------------------------------------- /v3python/autotune/binning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/autotune/binning.py -------------------------------------------------------------------------------- /v3python/autotune/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/autotune/config.py -------------------------------------------------------------------------------- /v3python/base/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/base/README.md -------------------------------------------------------------------------------- /v3python/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/base/__init__.py -------------------------------------------------------------------------------- /v3python/base/bind.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/base/bind.py -------------------------------------------------------------------------------- /v3python/base/cfield.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/base/cfield.py -------------------------------------------------------------------------------- /v3python/base/conditional_value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/base/conditional_value.py -------------------------------------------------------------------------------- /v3python/base/functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/base/functional.py -------------------------------------------------------------------------------- /v3python/base/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/base/interface.py -------------------------------------------------------------------------------- /v3python/base/parameter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/base/parameter.py -------------------------------------------------------------------------------- /v3python/base/typed_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/base/typed_choice.py -------------------------------------------------------------------------------- /v3python/celery/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /v3python/celery/celery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/celery/celery.py -------------------------------------------------------------------------------- /v3python/celery/celeryconfig.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/celery/celeryconfig.py -------------------------------------------------------------------------------- /v3python/celery/tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/celery/tasks.py -------------------------------------------------------------------------------- /v3python/codegen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/__init__.py -------------------------------------------------------------------------------- /v3python/codegen/affine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/affine.py -------------------------------------------------------------------------------- /v3python/codegen/affine_cap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/affine_cap.py -------------------------------------------------------------------------------- /v3python/codegen/autotune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/autotune.py -------------------------------------------------------------------------------- /v3python/codegen/basetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/basetune.py -------------------------------------------------------------------------------- /v3python/codegen/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/common.py -------------------------------------------------------------------------------- /v3python/codegen/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/interface.py -------------------------------------------------------------------------------- /v3python/codegen/kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/kernel.py -------------------------------------------------------------------------------- /v3python/codegen/operator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/operator.py -------------------------------------------------------------------------------- /v3python/codegen/optune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/optune.py -------------------------------------------------------------------------------- /v3python/codegen/root.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/root.py -------------------------------------------------------------------------------- /v3python/codegen/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template.py -------------------------------------------------------------------------------- /v3python/codegen/template/affine.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/affine.cc -------------------------------------------------------------------------------- /v3python/codegen/template/affine.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/affine.h -------------------------------------------------------------------------------- /v3python/codegen/template/autotune_table_entry.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/autotune_table_entry.cc -------------------------------------------------------------------------------- /v3python/codegen/template/kshim_launcher.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/kshim_launcher.cc -------------------------------------------------------------------------------- /v3python/codegen/template/metro_launcher.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/metro_launcher.cc -------------------------------------------------------------------------------- /v3python/codegen/template/op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/op.cc -------------------------------------------------------------------------------- /v3python/codegen/template/op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/op.h -------------------------------------------------------------------------------- /v3python/codegen/template/optune_table_entry.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/optune_table_entry.cc -------------------------------------------------------------------------------- /v3python/codegen/template/shim.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/shim.cc -------------------------------------------------------------------------------- /v3python/codegen/template/shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/shim.h -------------------------------------------------------------------------------- /v3python/codegen/template/snippet/metro_launch_kernel.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/snippet/metro_launch_kernel.cc -------------------------------------------------------------------------------- /v3python/codegen/template/snippet/metro_launch_kernel_ifelse.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/snippet/metro_launch_kernel_ifelse.cc -------------------------------------------------------------------------------- /v3python/codegen/template/snippet/metro_per_kernel.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/snippet/metro_per_kernel.cc -------------------------------------------------------------------------------- /v3python/codegen/template/snippet/metro_per_kernel_ifelse.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/snippet/metro_per_kernel_ifelse.cc -------------------------------------------------------------------------------- /v3python/codegen/template/snippet/validator.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/codegen/template/snippet/validator.cc -------------------------------------------------------------------------------- /v3python/comment_only_asm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/comment_only_asm.py -------------------------------------------------------------------------------- /v3python/compile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/compile.py -------------------------------------------------------------------------------- /v3python/database/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/__init__.py -------------------------------------------------------------------------------- /v3python/database/amd/gfx1100/flash/attn_fwd.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx1100/flash/attn_fwd.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx1100/flash/bwd_kernel_dk_dv.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx1100/flash/bwd_kernel_dk_dv.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx1100/flash/bwd_kernel_dq.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx1100/flash/bwd_kernel_dq.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx1100/flash/bwd_kernel_fuse.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx1100/flash/bwd_kernel_fuse.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx1201/flash/attn_fwd.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx1201/flash/attn_fwd.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx1201/flash/bwd_kernel_dk_dv.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx1201/flash/bwd_kernel_dk_dv.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx1201/flash/bwd_kernel_dq.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx1201/flash/bwd_kernel_dq.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx1201/flash/bwd_kernel_fuse.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx1201/flash/bwd_kernel_fuse.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx90a/flash/attn_fwd.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx90a/flash/attn_fwd.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx90a/flash/bwd_kernel_dk_dv.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx90a/flash/bwd_kernel_dk_dv.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx90a/flash/bwd_kernel_dq.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx90a/flash/bwd_kernel_dq.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx90a/flash/bwd_kernel_fuse.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx90a/flash/bwd_kernel_fuse.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx942/flash/attn_fwd.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx942/flash/attn_fwd.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx942/flash/bwd_kernel_dk_dv.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx942/flash/bwd_kernel_dk_dv.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx942/flash/bwd_kernel_dq.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx942/flash/bwd_kernel_dq.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx942/flash/bwd_kernel_fuse.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx942/flash/bwd_kernel_fuse.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx950/flash/attn_fwd.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx950/flash/attn_fwd.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx950/flash/bwd_kernel_dk_dv.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx950/flash/bwd_kernel_dk_dv.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx950/flash/bwd_kernel_dq.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx950/flash/bwd_kernel_dq.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/amd/gfx950/flash/bwd_kernel_fuse.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/amd/gfx950/flash/bwd_kernel_fuse.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/factories.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/factories.py -------------------------------------------------------------------------------- /v3python/database/op_database.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/op_database.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/sqlite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/sqlite.py -------------------------------------------------------------------------------- /v3python/database/tuning_database.sqlite3.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/tuning_database.sqlite3.tar.xz -------------------------------------------------------------------------------- /v3python/database/view.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database/view.py -------------------------------------------------------------------------------- /v3python/database_compose.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database_compose.py -------------------------------------------------------------------------------- /v3python/database_decompose.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/database_decompose.py -------------------------------------------------------------------------------- /v3python/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/generate.py -------------------------------------------------------------------------------- /v3python/gpu_targets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/gpu_targets.py -------------------------------------------------------------------------------- /v3python/kernel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/kernel/__init__.py -------------------------------------------------------------------------------- /v3python/kernel/kdesc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/kernel/kdesc.py -------------------------------------------------------------------------------- /v3python/kernel/ksignature.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/kernel/ksignature.py -------------------------------------------------------------------------------- /v3python/kernel/object_desc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/kernel/object_desc.py -------------------------------------------------------------------------------- /v3python/op/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/op/__init__.py -------------------------------------------------------------------------------- /v3python/op/conditional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/op/conditional.py -------------------------------------------------------------------------------- /v3python/op/metro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/op/metro.py -------------------------------------------------------------------------------- /v3python/op/operator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/op/operator.py -------------------------------------------------------------------------------- /v3python/rules/.gitignore: -------------------------------------------------------------------------------- 1 | !*.csv 2 | -------------------------------------------------------------------------------- /v3python/rules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/__init__.py -------------------------------------------------------------------------------- /v3python/rules/flash/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/__init__.py -------------------------------------------------------------------------------- /v3python/rules/flash/_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/_common.py -------------------------------------------------------------------------------- /v3python/rules/flash/aiter_bwd.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/aiter_bwd.csv -------------------------------------------------------------------------------- /v3python/rules/flash/aiter_bwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/aiter_bwd.py -------------------------------------------------------------------------------- /v3python/rules/flash/attn_fwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/attn_fwd.py -------------------------------------------------------------------------------- /v3python/rules/flash/bwd_kernel_dk_dv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/bwd_kernel_dk_dv.py -------------------------------------------------------------------------------- /v3python/rules/flash/bwd_kernel_dq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/bwd_kernel_dq.py -------------------------------------------------------------------------------- /v3python/rules/flash/bwd_kernel_fuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/bwd_kernel_fuse.py -------------------------------------------------------------------------------- /v3python/rules/flash/bwd_postprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/bwd_postprocess.py -------------------------------------------------------------------------------- /v3python/rules/flash/bwd_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/bwd_preprocess.py -------------------------------------------------------------------------------- /v3python/rules/flash/debug_fill_dropout_rng.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/debug_fill_dropout_rng.py -------------------------------------------------------------------------------- /v3python/rules/flash/debug_simulate_encoded_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/debug_simulate_encoded_softmax.py -------------------------------------------------------------------------------- /v3python/rules/flash/op_attn_bwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/op_attn_bwd.py -------------------------------------------------------------------------------- /v3python/rules/flash/op_attn_fwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/op_attn_fwd.py -------------------------------------------------------------------------------- /v3python/rules/flash/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/rules/flash/ops.py -------------------------------------------------------------------------------- /v3python/tune/defaults.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/defaults.py -------------------------------------------------------------------------------- /v3python/tune/exaid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/exaid.py -------------------------------------------------------------------------------- /v3python/tune/flash/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/flash/__init__.py -------------------------------------------------------------------------------- /v3python/tune/flash/flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/flash/flash.py -------------------------------------------------------------------------------- /v3python/tune/flash/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/flash/kernels.py -------------------------------------------------------------------------------- /v3python/tune/flash/reference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/flash/reference.py -------------------------------------------------------------------------------- /v3python/tune/flash/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/flash/utils.py -------------------------------------------------------------------------------- /v3python/tune/gpu_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/gpu_utils.py -------------------------------------------------------------------------------- /v3python/tune/kftdesc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/kftdesc.py -------------------------------------------------------------------------------- /v3python/tune/tdesc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/tdesc.py -------------------------------------------------------------------------------- /v3python/tune/test_exaid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/test_exaid.py -------------------------------------------------------------------------------- /v3python/tune/testrun.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/testrun.py -------------------------------------------------------------------------------- /v3python/tune/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/tune/utils.py -------------------------------------------------------------------------------- /v3python/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/utils/__init__.py -------------------------------------------------------------------------------- /v3python/utils/dict2json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/utils/dict2json.py -------------------------------------------------------------------------------- /v3python/utils/lazy_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/utils/lazy_file.py -------------------------------------------------------------------------------- /v3python/utils/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/utils/log.py -------------------------------------------------------------------------------- /v3python/utils/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/utils/registry.py -------------------------------------------------------------------------------- /v3python/write_image_signature.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3python/write_image_signature.py -------------------------------------------------------------------------------- /v3src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/CMakeLists.txt -------------------------------------------------------------------------------- /v3src/config.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | include ( "${CMAKE_CURRENT_LIST_DIR}/aotriton-targets.cmake" ) 3 | -------------------------------------------------------------------------------- /v3src/flash/aiter_bwd.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/flash/aiter_bwd.cc -------------------------------------------------------------------------------- /v3src/flash/attn_bwd.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/flash/attn_bwd.cc -------------------------------------------------------------------------------- /v3src/flash/attn_bwd_fused.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/flash/attn_bwd_fused.cc -------------------------------------------------------------------------------- /v3src/flash/attn_check.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/flash/attn_check.cc -------------------------------------------------------------------------------- /v3src/flash/attn_debug.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/flash/attn_debug.cc -------------------------------------------------------------------------------- /v3src/flash/attn_fwd.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/flash/attn_fwd.cc -------------------------------------------------------------------------------- /v3src/packed_kernel_common.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/packed_kernel_common.cc -------------------------------------------------------------------------------- /v3src/packed_kernel_unix.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/packed_kernel_unix.h -------------------------------------------------------------------------------- /v3src/packed_kernel_win32.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/packed_kernel_win32.h -------------------------------------------------------------------------------- /v3src/triton_kernel.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/triton_kernel.cc -------------------------------------------------------------------------------- /v3src/util.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/aotriton/HEAD/v3src/util.cc --------------------------------------------------------------------------------