├── .github └── workflows │ ├── claude-code-review.yml │ ├── claude.yml │ ├── pre-commit.yml │ ├── publish-to-pypi.yml │ └── python-app.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── benchmarks ├── configs │ └── base_fa4.yaml ├── fav4.py ├── flash.py ├── fp8_dynamic_cast.py ├── fp8_matmul.py ├── fp8_sat_cast.py ├── llama.py ├── profile_fp8_matmul.py ├── qlora.py └── qlora_memory_trace.py ├── pyproject.toml ├── scripts ├── bisect_nightly.py └── iterative_softmax_precision.py ├── test ├── test_cute.py ├── test_flash.py ├── test_fp8.py ├── test_mx.py ├── test_numerics.py ├── test_qlora.py └── test_utils.py └── transformer_nuggets ├── __init__.py ├── cute ├── __init__.py ├── base.py ├── cache.py ├── dynamic_args.py ├── element_wise.py ├── pipeline_elementwise.py ├── profiler │ ├── README.md │ ├── __init__.py │ ├── example.py │ ├── host.py │ └── ops.py ├── to_blocked.py ├── utils.py └── warps.py ├── flash ├── __init__.py ├── flash_attention.py └── masks.py ├── flex └── __init__.py ├── fp8 ├── __init__.py ├── fp8_matmul.py └── scaled_quant.py ├── helion ├── __init__.py ├── elementwise.py └── utils.py ├── llama ├── README.md ├── __init__.py ├── finetune.py ├── model.py ├── prepare_data.py ├── tokenizer.py └── train.py ├── misc ├── __init__.py ├── mlp.py └── swizzler.py ├── moe ├── __init__.py └── unrolled.py ├── mx ├── __init__.py └── to_blocked.py ├── numerics └── __init__.py ├── quant ├── __init__.py ├── dequant_kernel.py ├── nf4_tensor.py ├── qlora.py └── qlora_debug.py ├── subclass.py └── utils ├── __init__.py ├── benchmark.py ├── linear_extraction.py ├── model_extraction.py ├── shape_trace.py ├── tracing.py └── triton.py /.github/workflows/claude-code-review.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/.github/workflows/claude-code-review.yml -------------------------------------------------------------------------------- /.github/workflows/claude.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/.github/workflows/claude.yml -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/.github/workflows/publish-to-pypi.yml -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/.github/workflows/python-app.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/README.md -------------------------------------------------------------------------------- /benchmarks/configs/base_fa4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/benchmarks/configs/base_fa4.yaml -------------------------------------------------------------------------------- /benchmarks/fav4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/benchmarks/fav4.py -------------------------------------------------------------------------------- /benchmarks/flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/benchmarks/flash.py -------------------------------------------------------------------------------- /benchmarks/fp8_dynamic_cast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/benchmarks/fp8_dynamic_cast.py -------------------------------------------------------------------------------- /benchmarks/fp8_matmul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/benchmarks/fp8_matmul.py -------------------------------------------------------------------------------- /benchmarks/fp8_sat_cast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/benchmarks/fp8_sat_cast.py -------------------------------------------------------------------------------- /benchmarks/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/benchmarks/llama.py -------------------------------------------------------------------------------- /benchmarks/profile_fp8_matmul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/benchmarks/profile_fp8_matmul.py -------------------------------------------------------------------------------- /benchmarks/qlora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/benchmarks/qlora.py -------------------------------------------------------------------------------- /benchmarks/qlora_memory_trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/benchmarks/qlora_memory_trace.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/bisect_nightly.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/scripts/bisect_nightly.py -------------------------------------------------------------------------------- /scripts/iterative_softmax_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/scripts/iterative_softmax_precision.py -------------------------------------------------------------------------------- /test/test_cute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/test/test_cute.py -------------------------------------------------------------------------------- /test/test_flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/test/test_flash.py -------------------------------------------------------------------------------- /test/test_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/test/test_fp8.py -------------------------------------------------------------------------------- /test/test_mx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/test/test_mx.py -------------------------------------------------------------------------------- /test/test_numerics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/test/test_numerics.py -------------------------------------------------------------------------------- /test/test_qlora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/test/test_qlora.py -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/test/test_utils.py -------------------------------------------------------------------------------- /transformer_nuggets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/__init__.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/__init__.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/base.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/cache.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/dynamic_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/dynamic_args.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/element_wise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/element_wise.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/pipeline_elementwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/pipeline_elementwise.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/profiler/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/profiler/README.md -------------------------------------------------------------------------------- /transformer_nuggets/cute/profiler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/profiler/__init__.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/profiler/example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/profiler/example.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/profiler/host.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/profiler/host.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/profiler/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/profiler/ops.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/to_blocked.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/to_blocked.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/utils.py -------------------------------------------------------------------------------- /transformer_nuggets/cute/warps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/cute/warps.py -------------------------------------------------------------------------------- /transformer_nuggets/flash/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/flash/__init__.py -------------------------------------------------------------------------------- /transformer_nuggets/flash/flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/flash/flash_attention.py -------------------------------------------------------------------------------- /transformer_nuggets/flash/masks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/flash/masks.py -------------------------------------------------------------------------------- /transformer_nuggets/flex/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/flex/__init__.py -------------------------------------------------------------------------------- /transformer_nuggets/fp8/__init__.py: -------------------------------------------------------------------------------- 1 | from transformer_nuggets.fp8 import scaled_quant 2 | -------------------------------------------------------------------------------- /transformer_nuggets/fp8/fp8_matmul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/fp8/fp8_matmul.py -------------------------------------------------------------------------------- /transformer_nuggets/fp8/scaled_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/fp8/scaled_quant.py -------------------------------------------------------------------------------- /transformer_nuggets/helion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/helion/__init__.py -------------------------------------------------------------------------------- /transformer_nuggets/helion/elementwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/helion/elementwise.py -------------------------------------------------------------------------------- /transformer_nuggets/helion/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/helion/utils.py -------------------------------------------------------------------------------- /transformer_nuggets/llama/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/llama/README.md -------------------------------------------------------------------------------- /transformer_nuggets/llama/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /transformer_nuggets/llama/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/llama/finetune.py -------------------------------------------------------------------------------- /transformer_nuggets/llama/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/llama/model.py -------------------------------------------------------------------------------- /transformer_nuggets/llama/prepare_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/llama/prepare_data.py -------------------------------------------------------------------------------- /transformer_nuggets/llama/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/llama/tokenizer.py -------------------------------------------------------------------------------- /transformer_nuggets/llama/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/llama/train.py -------------------------------------------------------------------------------- /transformer_nuggets/misc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /transformer_nuggets/misc/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/misc/mlp.py -------------------------------------------------------------------------------- /transformer_nuggets/misc/swizzler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/misc/swizzler.py -------------------------------------------------------------------------------- /transformer_nuggets/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /transformer_nuggets/moe/unrolled.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/moe/unrolled.py -------------------------------------------------------------------------------- /transformer_nuggets/mx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/mx/__init__.py -------------------------------------------------------------------------------- /transformer_nuggets/mx/to_blocked.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/mx/to_blocked.py -------------------------------------------------------------------------------- /transformer_nuggets/numerics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/numerics/__init__.py -------------------------------------------------------------------------------- /transformer_nuggets/quant/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/quant/__init__.py -------------------------------------------------------------------------------- /transformer_nuggets/quant/dequant_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/quant/dequant_kernel.py -------------------------------------------------------------------------------- /transformer_nuggets/quant/nf4_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/quant/nf4_tensor.py -------------------------------------------------------------------------------- /transformer_nuggets/quant/qlora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/quant/qlora.py -------------------------------------------------------------------------------- /transformer_nuggets/quant/qlora_debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/quant/qlora_debug.py -------------------------------------------------------------------------------- /transformer_nuggets/subclass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/subclass.py -------------------------------------------------------------------------------- /transformer_nuggets/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/utils/__init__.py -------------------------------------------------------------------------------- /transformer_nuggets/utils/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/utils/benchmark.py -------------------------------------------------------------------------------- /transformer_nuggets/utils/linear_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/utils/linear_extraction.py -------------------------------------------------------------------------------- /transformer_nuggets/utils/model_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/utils/model_extraction.py -------------------------------------------------------------------------------- /transformer_nuggets/utils/shape_trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/utils/shape_trace.py -------------------------------------------------------------------------------- /transformer_nuggets/utils/tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/utils/tracing.py -------------------------------------------------------------------------------- /transformer_nuggets/utils/triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drisspg/transformer_nuggets/HEAD/transformer_nuggets/utils/triton.py --------------------------------------------------------------------------------