├── README.md ├── doc └── assets │ ├── Llama-3.1-8B-Instruct-ppl_c4_vs_avg_bit_width.png │ ├── Llama-3.1-8B-Instruct-ppl_fineweb_edu_vs_avg_bit_width.png │ ├── Llama-3.1-8B-Instruct-ppl_wikitext2_vs_avg_bit_width.png │ └── Llama-3.1-8B-Instruct-zero_shot_task_avg_vs_avg_bit_width.png ├── eval ├── lmeval.py ├── ppleval.py └── src │ ├── common_utils.py │ ├── data_utils.py │ ├── dist_utils.py │ ├── metrics.py │ └── model_utils.py ├── evopress ├── README.md ├── evo_quant_search.py ├── scripts │ └── run_quant_search.sh └── src │ ├── __init__.py │ ├── common_utils.py │ ├── data_utils.py │ ├── dist_utils.py │ ├── error_estimator.py │ ├── fast_obc.py │ ├── fast_obq.py │ ├── linalg_utils.py │ ├── losses.py │ ├── metrics.py │ ├── model_utils.py │ ├── optim_utils.py │ ├── owl_pruner.py │ ├── prompter.py │ ├── pruner.py │ ├── quant_utils.py │ └── quantizer.py ├── mapper ├── build_ep_database.sh ├── config_converter.py ├── gguf_splitter.py └── gguf_stitcher.py └── quant ├── gguf └── run_quant.sh └── gptq ├── pack_compressed_tensors_into_gguf.py ├── pack_gptq_into_gguf.py ├── quant.py ├── run_quant.sh └── src ├── common_utils.py ├── data_utils.py ├── dist_utils.py ├── gptq.py ├── linalg_utils.py ├── metrics.py ├── model_utils.py ├── packing_utils.py ├── quant_utils.py └── quantizer.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/README.md -------------------------------------------------------------------------------- /doc/assets/Llama-3.1-8B-Instruct-ppl_c4_vs_avg_bit_width.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/doc/assets/Llama-3.1-8B-Instruct-ppl_c4_vs_avg_bit_width.png -------------------------------------------------------------------------------- /doc/assets/Llama-3.1-8B-Instruct-ppl_fineweb_edu_vs_avg_bit_width.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/doc/assets/Llama-3.1-8B-Instruct-ppl_fineweb_edu_vs_avg_bit_width.png -------------------------------------------------------------------------------- /doc/assets/Llama-3.1-8B-Instruct-ppl_wikitext2_vs_avg_bit_width.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/doc/assets/Llama-3.1-8B-Instruct-ppl_wikitext2_vs_avg_bit_width.png -------------------------------------------------------------------------------- /doc/assets/Llama-3.1-8B-Instruct-zero_shot_task_avg_vs_avg_bit_width.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/doc/assets/Llama-3.1-8B-Instruct-zero_shot_task_avg_vs_avg_bit_width.png -------------------------------------------------------------------------------- /eval/lmeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/eval/lmeval.py -------------------------------------------------------------------------------- /eval/ppleval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/eval/ppleval.py -------------------------------------------------------------------------------- /eval/src/common_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/eval/src/common_utils.py -------------------------------------------------------------------------------- /eval/src/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/eval/src/data_utils.py -------------------------------------------------------------------------------- /eval/src/dist_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/eval/src/dist_utils.py -------------------------------------------------------------------------------- /eval/src/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/eval/src/metrics.py -------------------------------------------------------------------------------- /eval/src/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/eval/src/model_utils.py -------------------------------------------------------------------------------- /evopress/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/README.md -------------------------------------------------------------------------------- /evopress/evo_quant_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/evo_quant_search.py -------------------------------------------------------------------------------- /evopress/scripts/run_quant_search.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/scripts/run_quant_search.sh -------------------------------------------------------------------------------- /evopress/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evopress/src/common_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/common_utils.py -------------------------------------------------------------------------------- /evopress/src/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/data_utils.py -------------------------------------------------------------------------------- /evopress/src/dist_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/dist_utils.py -------------------------------------------------------------------------------- /evopress/src/error_estimator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/error_estimator.py -------------------------------------------------------------------------------- /evopress/src/fast_obc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/fast_obc.py -------------------------------------------------------------------------------- /evopress/src/fast_obq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/fast_obq.py -------------------------------------------------------------------------------- /evopress/src/linalg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/linalg_utils.py -------------------------------------------------------------------------------- /evopress/src/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/losses.py -------------------------------------------------------------------------------- /evopress/src/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/metrics.py -------------------------------------------------------------------------------- /evopress/src/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/model_utils.py -------------------------------------------------------------------------------- /evopress/src/optim_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/optim_utils.py -------------------------------------------------------------------------------- /evopress/src/owl_pruner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/owl_pruner.py -------------------------------------------------------------------------------- /evopress/src/prompter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/prompter.py -------------------------------------------------------------------------------- /evopress/src/pruner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/pruner.py -------------------------------------------------------------------------------- /evopress/src/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/quant_utils.py -------------------------------------------------------------------------------- /evopress/src/quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/evopress/src/quantizer.py -------------------------------------------------------------------------------- /mapper/build_ep_database.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/mapper/build_ep_database.sh -------------------------------------------------------------------------------- /mapper/config_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/mapper/config_converter.py -------------------------------------------------------------------------------- /mapper/gguf_splitter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/mapper/gguf_splitter.py -------------------------------------------------------------------------------- /mapper/gguf_stitcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/mapper/gguf_stitcher.py -------------------------------------------------------------------------------- /quant/gguf/run_quant.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gguf/run_quant.sh -------------------------------------------------------------------------------- /quant/gptq/pack_compressed_tensors_into_gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/pack_compressed_tensors_into_gguf.py -------------------------------------------------------------------------------- /quant/gptq/pack_gptq_into_gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/pack_gptq_into_gguf.py -------------------------------------------------------------------------------- /quant/gptq/quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/quant.py -------------------------------------------------------------------------------- /quant/gptq/run_quant.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/run_quant.sh -------------------------------------------------------------------------------- /quant/gptq/src/common_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/src/common_utils.py -------------------------------------------------------------------------------- /quant/gptq/src/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/src/data_utils.py -------------------------------------------------------------------------------- /quant/gptq/src/dist_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/src/dist_utils.py -------------------------------------------------------------------------------- /quant/gptq/src/gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/src/gptq.py -------------------------------------------------------------------------------- /quant/gptq/src/linalg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/src/linalg_utils.py -------------------------------------------------------------------------------- /quant/gptq/src/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/src/metrics.py -------------------------------------------------------------------------------- /quant/gptq/src/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/src/model_utils.py -------------------------------------------------------------------------------- /quant/gptq/src/packing_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/src/packing_utils.py -------------------------------------------------------------------------------- /quant/gptq/src/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/src/quant_utils.py -------------------------------------------------------------------------------- /quant/gptq/src/quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IST-DASLab/gptq-gguf-toolkit/HEAD/quant/gptq/src/quantizer.py --------------------------------------------------------------------------------