├── .github └── workflows │ └── main.yml ├── .gitignore ├── .gitmodules ├── LICENSE.md ├── README.md ├── assets └── four-over-six.png ├── pyproject.toml ├── scripts ├── __init__.py ├── generate_kernels.py ├── ptq │ ├── __init__.py │ ├── __main__.py │ ├── evaluator.py │ ├── high_precision.py │ ├── rtn.py │ ├── utils.py │ └── worker.py └── resources.py ├── setup.py └── src └── fouroversix ├── __init__.py ├── backend.py ├── csrc ├── bindings.cpp ├── fp4_gemm.cu ├── include │ ├── element_traits.hpp │ ├── fp4_quant.h │ ├── fp4_quant_kernel.h │ ├── fp4_quant_launch_template.h │ ├── hardware_info.h │ ├── kernel_traits.h │ ├── static_switch.h │ └── utils.h └── quantize │ ├── fp4_quant.cu │ ├── fp4_quant_bf16_mxfp4_rht_sm100.cu │ ├── fp4_quant_bf16_mxfp4_rht_trans_sm100.cu │ ├── fp4_quant_bf16_mxfp4_sm100.cu │ ├── fp4_quant_bf16_mxfp4_trans_sm100.cu │ ├── fp4_quant_bf16_nvfp4_rht_sm100.cu │ ├── fp4_quant_bf16_nvfp4_rht_trans_sm100.cu │ ├── fp4_quant_bf16_nvfp4_sm100.cu │ ├── fp4_quant_bf16_nvfp4_trans_sm100.cu │ ├── fp4_quant_fp16_mxfp4_rht_sm100.cu │ ├── fp4_quant_fp16_mxfp4_rht_trans_sm100.cu │ ├── fp4_quant_fp16_mxfp4_sm100.cu │ ├── fp4_quant_fp16_mxfp4_trans_sm100.cu │ ├── fp4_quant_fp16_nvfp4_rht_sm100.cu │ ├── fp4_quant_fp16_nvfp4_rht_trans_sm100.cu │ ├── fp4_quant_fp16_nvfp4_sm100.cu │ └── fp4_quant_fp16_nvfp4_trans_sm100.cu ├── frontend.py ├── ops.py ├── ptq.py ├── quantize ├── __init__.py ├── reference.py └── triton_kernel.py └── utils.py /.github/workflows/main.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/.github/workflows/main.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/.gitmodules -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/LICENSE.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/README.md -------------------------------------------------------------------------------- /assets/four-over-six.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/assets/four-over-six.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/generate_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/scripts/generate_kernels.py -------------------------------------------------------------------------------- /scripts/ptq/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/ptq/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/scripts/ptq/__main__.py -------------------------------------------------------------------------------- /scripts/ptq/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/scripts/ptq/evaluator.py -------------------------------------------------------------------------------- /scripts/ptq/high_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/scripts/ptq/high_precision.py -------------------------------------------------------------------------------- /scripts/ptq/rtn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/scripts/ptq/rtn.py -------------------------------------------------------------------------------- /scripts/ptq/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/scripts/ptq/utils.py -------------------------------------------------------------------------------- /scripts/ptq/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/scripts/ptq/worker.py -------------------------------------------------------------------------------- /scripts/resources.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/scripts/resources.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/setup.py -------------------------------------------------------------------------------- /src/fouroversix/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/__init__.py -------------------------------------------------------------------------------- /src/fouroversix/backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/backend.py -------------------------------------------------------------------------------- /src/fouroversix/csrc/bindings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/bindings.cpp -------------------------------------------------------------------------------- /src/fouroversix/csrc/fp4_gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/fp4_gemm.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/include/element_traits.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/include/element_traits.hpp -------------------------------------------------------------------------------- /src/fouroversix/csrc/include/fp4_quant.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/include/fp4_quant.h -------------------------------------------------------------------------------- /src/fouroversix/csrc/include/fp4_quant_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/include/fp4_quant_kernel.h -------------------------------------------------------------------------------- /src/fouroversix/csrc/include/fp4_quant_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/include/fp4_quant_launch_template.h -------------------------------------------------------------------------------- /src/fouroversix/csrc/include/hardware_info.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/include/hardware_info.h -------------------------------------------------------------------------------- /src/fouroversix/csrc/include/kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/include/kernel_traits.h -------------------------------------------------------------------------------- /src/fouroversix/csrc/include/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/include/static_switch.h -------------------------------------------------------------------------------- /src/fouroversix/csrc/include/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/include/utils.h -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_bf16_mxfp4_rht_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_bf16_mxfp4_rht_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_bf16_mxfp4_rht_trans_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_bf16_mxfp4_rht_trans_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_bf16_mxfp4_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_bf16_mxfp4_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_bf16_mxfp4_trans_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_bf16_mxfp4_trans_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_bf16_nvfp4_rht_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_bf16_nvfp4_rht_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_bf16_nvfp4_rht_trans_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_bf16_nvfp4_rht_trans_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_bf16_nvfp4_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_bf16_nvfp4_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_bf16_nvfp4_trans_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_bf16_nvfp4_trans_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_fp16_mxfp4_rht_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_fp16_mxfp4_rht_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_fp16_mxfp4_rht_trans_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_fp16_mxfp4_rht_trans_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_fp16_mxfp4_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_fp16_mxfp4_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_fp16_mxfp4_trans_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_fp16_mxfp4_trans_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_fp16_nvfp4_rht_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_fp16_nvfp4_rht_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_fp16_nvfp4_rht_trans_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_fp16_nvfp4_rht_trans_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_fp16_nvfp4_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_fp16_nvfp4_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/csrc/quantize/fp4_quant_fp16_nvfp4_trans_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/csrc/quantize/fp4_quant_fp16_nvfp4_trans_sm100.cu -------------------------------------------------------------------------------- /src/fouroversix/frontend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/frontend.py -------------------------------------------------------------------------------- /src/fouroversix/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/ops.py -------------------------------------------------------------------------------- /src/fouroversix/ptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/ptq.py -------------------------------------------------------------------------------- /src/fouroversix/quantize/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fouroversix/quantize/reference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/quantize/reference.py -------------------------------------------------------------------------------- /src/fouroversix/quantize/triton_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/quantize/triton_kernel.py -------------------------------------------------------------------------------- /src/fouroversix/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/fouroversix/HEAD/src/fouroversix/utils.py --------------------------------------------------------------------------------