├── .gitignore ├── .style.yapf ├── LICENSE.txt ├── README.md ├── blocking_api.py ├── download.py ├── inference_utils.py ├── quant ├── __init__.py ├── custom_autotune.py ├── fused_attn.py ├── fused_mlp.py ├── quant_linear.py ├── quantizer.py └── triton_norm.py ├── requirements.txt ├── streaming_api.py ├── streaming_api_client_example.py └── utils ├── __init__.py ├── datautils.py ├── export.py └── modelutils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/.gitignore -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/.style.yapf -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/README.md -------------------------------------------------------------------------------- /blocking_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/blocking_api.py -------------------------------------------------------------------------------- /download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/download.py -------------------------------------------------------------------------------- /inference_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/inference_utils.py -------------------------------------------------------------------------------- /quant/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/quant/__init__.py -------------------------------------------------------------------------------- /quant/custom_autotune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/quant/custom_autotune.py -------------------------------------------------------------------------------- /quant/fused_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/quant/fused_attn.py -------------------------------------------------------------------------------- /quant/fused_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/quant/fused_mlp.py -------------------------------------------------------------------------------- /quant/quant_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/quant/quant_linear.py -------------------------------------------------------------------------------- /quant/quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/quant/quantizer.py -------------------------------------------------------------------------------- /quant/triton_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/quant/triton_norm.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/requirements.txt -------------------------------------------------------------------------------- /streaming_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/streaming_api.py -------------------------------------------------------------------------------- /streaming_api_client_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/streaming_api_client_example.py -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/utils/__init__.py -------------------------------------------------------------------------------- /utils/datautils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/utils/datautils.py -------------------------------------------------------------------------------- /utils/export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/utils/export.py -------------------------------------------------------------------------------- /utils/modelutils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzbac/GPTQ-for-LLaMa-API/HEAD/utils/modelutils.py --------------------------------------------------------------------------------