├── README.md ├── datautils_block.py ├── datautils_e2e.py ├── deita_dataset ├── __init__.py ├── constants.py ├── conversation.py └── train.py ├── examples ├── block_ap │ ├── Llama-2-7b │ │ ├── w2g128.sh │ │ ├── w2g64.sh │ │ ├── w3g128.sh │ │ └── w4g128.sh │ └── Mistral-Large-Instruct │ │ └── w2g64.sh ├── e2e_qp │ ├── Llama-2-7b │ │ ├── w2g128-alpaca.sh │ │ ├── w2g128-redpajama.sh │ │ ├── w2g64-alpaca.sh │ │ ├── w2g64-redpajama.sh │ │ ├── w3g128-alpaca.sh │ │ ├── w3g128-redpajama.sh │ │ ├── w4g128-alpaca.sh │ │ └── w4g128-redpajama.sh │ └── Llama-3-8b-instruct │ │ ├── w2g128-deita.sh │ │ ├── w2g64-deita.sh │ │ ├── w3g128-deita.sh │ │ └── w4g128-deita.sh ├── inference │ └── Llama-2-7b │ │ ├── fp16.sh │ │ └── w2g64.sh └── model_transfer │ ├── efficientqat_to_bitblas │ └── llama-2-7b.sh │ ├── efficientqat_to_gptq │ └── llama-2-7b.sh │ ├── fp32_to_16 │ └── llama-2-7b.sh │ └── real_to_fake │ └── llama-2-7b.sh ├── main_block_ap.py ├── main_e2e_qp.py ├── model_transfer ├── __init__.py ├── efficientqat_to_others.py ├── fp32_to_16.py └── real_to_fake.py ├── quantize ├── __init__.py ├── block_ap.py ├── int_linear_fake.py ├── int_linear_real.py ├── quantizer.py ├── triton_utils │ ├── __init__.py │ ├── custom_autotune.py │ ├── kernels.py │ └── mixin.py └── utils.py ├── requirements.txt └── utils.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/README.md -------------------------------------------------------------------------------- /datautils_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/datautils_block.py -------------------------------------------------------------------------------- /datautils_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/datautils_e2e.py -------------------------------------------------------------------------------- /deita_dataset/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.19" 2 | -------------------------------------------------------------------------------- /deita_dataset/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/deita_dataset/constants.py -------------------------------------------------------------------------------- /deita_dataset/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/deita_dataset/conversation.py -------------------------------------------------------------------------------- /deita_dataset/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/deita_dataset/train.py -------------------------------------------------------------------------------- /examples/block_ap/Llama-2-7b/w2g128.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/block_ap/Llama-2-7b/w2g128.sh -------------------------------------------------------------------------------- /examples/block_ap/Llama-2-7b/w2g64.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/block_ap/Llama-2-7b/w2g64.sh -------------------------------------------------------------------------------- /examples/block_ap/Llama-2-7b/w3g128.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/block_ap/Llama-2-7b/w3g128.sh -------------------------------------------------------------------------------- /examples/block_ap/Llama-2-7b/w4g128.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/block_ap/Llama-2-7b/w4g128.sh -------------------------------------------------------------------------------- /examples/block_ap/Mistral-Large-Instruct/w2g64.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/block_ap/Mistral-Large-Instruct/w2g64.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-2-7b/w2g128-alpaca.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-2-7b/w2g128-alpaca.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-2-7b/w2g128-redpajama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-2-7b/w2g128-redpajama.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-2-7b/w2g64-alpaca.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-2-7b/w2g64-alpaca.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-2-7b/w2g64-redpajama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-2-7b/w2g64-redpajama.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-2-7b/w3g128-alpaca.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-2-7b/w3g128-alpaca.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-2-7b/w3g128-redpajama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-2-7b/w3g128-redpajama.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-2-7b/w4g128-alpaca.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-2-7b/w4g128-alpaca.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-2-7b/w4g128-redpajama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-2-7b/w4g128-redpajama.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-3-8b-instruct/w2g128-deita.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-3-8b-instruct/w2g128-deita.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-3-8b-instruct/w2g64-deita.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-3-8b-instruct/w2g64-deita.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-3-8b-instruct/w3g128-deita.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-3-8b-instruct/w3g128-deita.sh -------------------------------------------------------------------------------- /examples/e2e_qp/Llama-3-8b-instruct/w4g128-deita.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/e2e_qp/Llama-3-8b-instruct/w4g128-deita.sh -------------------------------------------------------------------------------- /examples/inference/Llama-2-7b/fp16.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/inference/Llama-2-7b/fp16.sh -------------------------------------------------------------------------------- /examples/inference/Llama-2-7b/w2g64.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/inference/Llama-2-7b/w2g64.sh -------------------------------------------------------------------------------- /examples/model_transfer/efficientqat_to_bitblas/llama-2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/model_transfer/efficientqat_to_bitblas/llama-2-7b.sh -------------------------------------------------------------------------------- /examples/model_transfer/efficientqat_to_gptq/llama-2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/model_transfer/efficientqat_to_gptq/llama-2-7b.sh -------------------------------------------------------------------------------- /examples/model_transfer/fp32_to_16/llama-2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/model_transfer/fp32_to_16/llama-2-7b.sh -------------------------------------------------------------------------------- /examples/model_transfer/real_to_fake/llama-2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/examples/model_transfer/real_to_fake/llama-2-7b.sh -------------------------------------------------------------------------------- /main_block_ap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/main_block_ap.py -------------------------------------------------------------------------------- /main_e2e_qp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/main_e2e_qp.py -------------------------------------------------------------------------------- /model_transfer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /model_transfer/efficientqat_to_others.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/model_transfer/efficientqat_to_others.py -------------------------------------------------------------------------------- /model_transfer/fp32_to_16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/model_transfer/fp32_to_16.py -------------------------------------------------------------------------------- /model_transfer/real_to_fake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/model_transfer/real_to_fake.py -------------------------------------------------------------------------------- /quantize/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /quantize/block_ap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/quantize/block_ap.py -------------------------------------------------------------------------------- /quantize/int_linear_fake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/quantize/int_linear_fake.py -------------------------------------------------------------------------------- /quantize/int_linear_real.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/quantize/int_linear_real.py -------------------------------------------------------------------------------- /quantize/quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/quantize/quantizer.py -------------------------------------------------------------------------------- /quantize/triton_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /quantize/triton_utils/custom_autotune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/quantize/triton_utils/custom_autotune.py -------------------------------------------------------------------------------- /quantize/triton_utils/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/quantize/triton_utils/kernels.py -------------------------------------------------------------------------------- /quantize/triton_utils/mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/quantize/triton_utils/mixin.py -------------------------------------------------------------------------------- /quantize/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/quantize/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/requirements.txt -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGVLab/EfficientQAT/HEAD/utils.py --------------------------------------------------------------------------------