├── .gitignore ├── LICENSE ├── README.md ├── configs └── quantization │ ├── llama2 │ ├── awq_llama2.yml │ ├── tesseraq_w2_L2_13b.yml │ ├── tesseraq_w2_L2_70b.yml │ ├── tesseraq_w2_L2_7b.yml │ ├── tesseraq_w2g128_L2_13b.yml │ ├── tesseraq_w2g128_L2_70b.yml │ ├── tesseraq_w2g128_L2_7b.yml │ ├── tesseraq_w2g64_L2_13b.yml │ ├── tesseraq_w2g64_L2_70b.yml │ ├── tesseraq_w2g64_L2_7b.yml │ ├── tesseraq_w3_L2_13b.yml │ ├── tesseraq_w3_L2_70b.yml │ ├── tesseraq_w3_L2_7b.yml │ ├── tesseraq_w3g128_L2_13b.yml │ ├── tesseraq_w3g128_L2_70b.yml │ ├── tesseraq_w3g128_L2_7b.yml │ ├── tesseraq_w4_L2_13b.yml │ ├── tesseraq_w4_L2_70b.yml │ └── tesseraq_w4_L2_7b.yml │ ├── llama3_1 │ ├── tesseraq_w2g128_L31_70b.yml │ ├── tesseraq_w2g128_L31_8b.yml │ ├── tesseraq_w3g128_L31_70b.yml │ └── tesseraq_w3g128_L31_8b.yml │ ├── llama3_2 │ ├── awq_w2g128_L32_1b.yml │ ├── awq_w2g128_L32_3b.yml │ ├── awq_w3g128_L32_1b.yml │ ├── awq_w3g128_L32_3b.yml │ ├── awq_w4g128_L32_1b.yml │ ├── awq_w4g128_L32_3b.yml │ ├── tesseraq_w2g128_L32_1b.yml │ ├── tesseraq_w2g128_L32_3b.yml │ ├── tesseraq_w3g128_L32_1b.yml │ ├── tesseraq_w3g128_L32_3b.yml │ ├── tesseraq_w4g128_L32_1b.yml │ └── tesseraq_w4g128_L32_3b.yml │ └── wa_quant │ ├── awq_llama2_w4a4.yml │ ├── tesseraq_w4a4_L2_7b.yml │ ├── tesseraq_w6a6_L2_7b.yml │ └── tesseraq_w8a8_L2_7b.yml ├── imgs └── tesseraq.png ├── llmc ├── __main__.py ├── compression │ ├── __init__.py │ ├── blockwise_optimization.py │ ├── quantization │ │ ├── __init__.py │ │ ├── adadim.py │ │ ├── awq.py │ │ ├── base_blockwise_quantization.py │ │ ├── dgq.py │ │ ├── gptq.py │ │ ├── hadamard_utils.py │ │ ├── hqq.py │ │ ├── llmint8.py │ │ ├── module_utils.py │ │ ├── ntweak.py │ │ ├── omniq.py │ │ ├── osplus.py │ │ ├── quant.py │ │ ├── quarot.py │ │ ├── quik.py │ │ ├── rtn.py │ │ ├── smoothquant.py │ │ ├── spqr.py │ │ ├── tesseraq.py │ │ ├── train_utils.py │ │ └── utils.py │ └── sparsification │ │ ├── __init__.py │ │ ├── base_blockwise_sparsification.py │ │ ├── magnitude.py │ │ ├── shortgpt.py │ │ ├── sparse.py │ │ └── wanda.py ├── data │ ├── __init__.py │ ├── dataset │ │ ├── __init__.py │ │ ├── base_dataset.py │ │ └── specified_preproc.py │ └── tokenizer │ │ ├── __init__.py │ │ └── base_tokenizer.py ├── eval │ ├── __init__.py │ ├── eval_acc.py │ ├── eval_base.py │ ├── eval_ppl.py │ └── eval_token_consist.py ├── models │ ├── __init__.py │ ├── base_model.py │ ├── bloom.py │ ├── deepseekv2.py │ ├── falcon.py │ ├── gemma2.py │ ├── internlm2.py │ ├── internomni.py │ ├── internvl2.py │ ├── llama.py │ ├── llava.py │ ├── minicpm.py │ ├── mistral.py │ ├── mixtral.py │ ├── opt.py │ ├── phi.py │ ├── qwen.py │ ├── qwen2.py │ ├── qwen2moe.py │ ├── qwenvl.py │ ├── smollm.py │ ├── stablelm.py │ ├── starcoder.py │ └── vit.py └── utils │ ├── __init__.py │ ├── export_autoawq.py │ ├── export_trtllm.py │ ├── export_vllm.py │ ├── registry_factory.py │ └── utils.py ├── requirements.txt ├── requirements ├── docs.txt └── runtime.txt ├── scripts ├── run_awq_llama.sh ├── run_llama_32.sh └── run_tesseraq_llama.sh └── tools ├── attention_vis.py ├── download_calib_dataset.py ├── download_eval_dataset.py ├── llm_eval.py └── quant_analysis.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/README.md -------------------------------------------------------------------------------- /configs/quantization/llama2/awq_llama2.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/awq_llama2.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w2_L2_13b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w2_L2_13b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w2_L2_70b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w2_L2_70b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w2_L2_7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w2_L2_7b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w2g128_L2_13b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w2g128_L2_13b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w2g128_L2_70b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w2g128_L2_70b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w2g128_L2_7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w2g128_L2_7b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w2g64_L2_13b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w2g64_L2_13b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w2g64_L2_70b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w2g64_L2_70b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w2g64_L2_7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w2g64_L2_7b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w3_L2_13b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w3_L2_13b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w3_L2_70b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w3_L2_70b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w3_L2_7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w3_L2_7b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w3g128_L2_13b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w3g128_L2_13b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w3g128_L2_70b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w3g128_L2_70b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w3g128_L2_7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w3g128_L2_7b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w4_L2_13b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w4_L2_13b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w4_L2_70b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w4_L2_70b.yml -------------------------------------------------------------------------------- /configs/quantization/llama2/tesseraq_w4_L2_7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama2/tesseraq_w4_L2_7b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_1/tesseraq_w2g128_L31_70b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_1/tesseraq_w2g128_L31_70b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_1/tesseraq_w2g128_L31_8b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_1/tesseraq_w2g128_L31_8b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_1/tesseraq_w3g128_L31_70b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_1/tesseraq_w3g128_L31_70b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_1/tesseraq_w3g128_L31_8b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_1/tesseraq_w3g128_L31_8b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/awq_w2g128_L32_1b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/awq_w2g128_L32_1b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/awq_w2g128_L32_3b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/awq_w2g128_L32_3b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/awq_w3g128_L32_1b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/awq_w3g128_L32_1b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/awq_w3g128_L32_3b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/awq_w3g128_L32_3b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/awq_w4g128_L32_1b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/awq_w4g128_L32_1b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/awq_w4g128_L32_3b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/awq_w4g128_L32_3b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/tesseraq_w2g128_L32_1b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/tesseraq_w2g128_L32_1b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/tesseraq_w2g128_L32_3b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/tesseraq_w2g128_L32_3b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/tesseraq_w3g128_L32_1b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/tesseraq_w3g128_L32_1b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/tesseraq_w3g128_L32_3b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/tesseraq_w3g128_L32_3b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/tesseraq_w4g128_L32_1b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/tesseraq_w4g128_L32_1b.yml -------------------------------------------------------------------------------- /configs/quantization/llama3_2/tesseraq_w4g128_L32_3b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/llama3_2/tesseraq_w4g128_L32_3b.yml -------------------------------------------------------------------------------- /configs/quantization/wa_quant/awq_llama2_w4a4.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/wa_quant/awq_llama2_w4a4.yml -------------------------------------------------------------------------------- /configs/quantization/wa_quant/tesseraq_w4a4_L2_7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/wa_quant/tesseraq_w4a4_L2_7b.yml -------------------------------------------------------------------------------- /configs/quantization/wa_quant/tesseraq_w6a6_L2_7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/wa_quant/tesseraq_w6a6_L2_7b.yml -------------------------------------------------------------------------------- /configs/quantization/wa_quant/tesseraq_w8a8_L2_7b.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/configs/quantization/wa_quant/tesseraq_w8a8_L2_7b.yml -------------------------------------------------------------------------------- /imgs/tesseraq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/imgs/tesseraq.png -------------------------------------------------------------------------------- /llmc/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/__main__.py -------------------------------------------------------------------------------- /llmc/compression/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/__init__.py -------------------------------------------------------------------------------- /llmc/compression/blockwise_optimization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/blockwise_optimization.py -------------------------------------------------------------------------------- /llmc/compression/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/__init__.py -------------------------------------------------------------------------------- /llmc/compression/quantization/adadim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/adadim.py -------------------------------------------------------------------------------- /llmc/compression/quantization/awq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/awq.py -------------------------------------------------------------------------------- /llmc/compression/quantization/base_blockwise_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/base_blockwise_quantization.py -------------------------------------------------------------------------------- /llmc/compression/quantization/dgq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/dgq.py -------------------------------------------------------------------------------- /llmc/compression/quantization/gptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/gptq.py -------------------------------------------------------------------------------- /llmc/compression/quantization/hadamard_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/hadamard_utils.py -------------------------------------------------------------------------------- /llmc/compression/quantization/hqq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/hqq.py -------------------------------------------------------------------------------- /llmc/compression/quantization/llmint8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/llmint8.py -------------------------------------------------------------------------------- /llmc/compression/quantization/module_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/module_utils.py -------------------------------------------------------------------------------- /llmc/compression/quantization/ntweak.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/ntweak.py -------------------------------------------------------------------------------- /llmc/compression/quantization/omniq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/omniq.py -------------------------------------------------------------------------------- /llmc/compression/quantization/osplus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/osplus.py -------------------------------------------------------------------------------- /llmc/compression/quantization/quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/quant.py -------------------------------------------------------------------------------- /llmc/compression/quantization/quarot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/quarot.py -------------------------------------------------------------------------------- /llmc/compression/quantization/quik.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/quik.py -------------------------------------------------------------------------------- /llmc/compression/quantization/rtn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/rtn.py -------------------------------------------------------------------------------- /llmc/compression/quantization/smoothquant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/smoothquant.py -------------------------------------------------------------------------------- /llmc/compression/quantization/spqr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/spqr.py -------------------------------------------------------------------------------- /llmc/compression/quantization/tesseraq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/tesseraq.py -------------------------------------------------------------------------------- /llmc/compression/quantization/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/train_utils.py -------------------------------------------------------------------------------- /llmc/compression/quantization/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/quantization/utils.py -------------------------------------------------------------------------------- /llmc/compression/sparsification/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/sparsification/__init__.py -------------------------------------------------------------------------------- /llmc/compression/sparsification/base_blockwise_sparsification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/sparsification/base_blockwise_sparsification.py -------------------------------------------------------------------------------- /llmc/compression/sparsification/magnitude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/sparsification/magnitude.py -------------------------------------------------------------------------------- /llmc/compression/sparsification/shortgpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/sparsification/shortgpt.py -------------------------------------------------------------------------------- /llmc/compression/sparsification/sparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/sparsification/sparse.py -------------------------------------------------------------------------------- /llmc/compression/sparsification/wanda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/compression/sparsification/wanda.py -------------------------------------------------------------------------------- /llmc/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/data/__init__.py -------------------------------------------------------------------------------- /llmc/data/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/data/dataset/__init__.py -------------------------------------------------------------------------------- /llmc/data/dataset/base_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/data/dataset/base_dataset.py -------------------------------------------------------------------------------- /llmc/data/dataset/specified_preproc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/data/dataset/specified_preproc.py -------------------------------------------------------------------------------- /llmc/data/tokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/data/tokenizer/__init__.py -------------------------------------------------------------------------------- /llmc/data/tokenizer/base_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/data/tokenizer/base_tokenizer.py -------------------------------------------------------------------------------- /llmc/eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/eval/__init__.py -------------------------------------------------------------------------------- /llmc/eval/eval_acc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/eval/eval_acc.py -------------------------------------------------------------------------------- /llmc/eval/eval_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/eval/eval_base.py -------------------------------------------------------------------------------- /llmc/eval/eval_ppl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/eval/eval_ppl.py -------------------------------------------------------------------------------- /llmc/eval/eval_token_consist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/eval/eval_token_consist.py -------------------------------------------------------------------------------- /llmc/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/__init__.py -------------------------------------------------------------------------------- /llmc/models/base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/base_model.py -------------------------------------------------------------------------------- /llmc/models/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/bloom.py -------------------------------------------------------------------------------- /llmc/models/deepseekv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/deepseekv2.py -------------------------------------------------------------------------------- /llmc/models/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/falcon.py -------------------------------------------------------------------------------- /llmc/models/gemma2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/gemma2.py -------------------------------------------------------------------------------- /llmc/models/internlm2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/internlm2.py -------------------------------------------------------------------------------- /llmc/models/internomni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/internomni.py -------------------------------------------------------------------------------- /llmc/models/internvl2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/internvl2.py -------------------------------------------------------------------------------- /llmc/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/llama.py -------------------------------------------------------------------------------- /llmc/models/llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/llava.py -------------------------------------------------------------------------------- /llmc/models/minicpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/minicpm.py -------------------------------------------------------------------------------- /llmc/models/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/mistral.py -------------------------------------------------------------------------------- /llmc/models/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/mixtral.py -------------------------------------------------------------------------------- /llmc/models/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/opt.py -------------------------------------------------------------------------------- /llmc/models/phi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/phi.py -------------------------------------------------------------------------------- /llmc/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/qwen.py -------------------------------------------------------------------------------- /llmc/models/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/qwen2.py -------------------------------------------------------------------------------- /llmc/models/qwen2moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/qwen2moe.py -------------------------------------------------------------------------------- /llmc/models/qwenvl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/qwenvl.py -------------------------------------------------------------------------------- /llmc/models/smollm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/smollm.py -------------------------------------------------------------------------------- /llmc/models/stablelm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/stablelm.py -------------------------------------------------------------------------------- /llmc/models/starcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/starcoder.py -------------------------------------------------------------------------------- /llmc/models/vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/models/vit.py -------------------------------------------------------------------------------- /llmc/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/utils/__init__.py -------------------------------------------------------------------------------- /llmc/utils/export_autoawq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/utils/export_autoawq.py -------------------------------------------------------------------------------- /llmc/utils/export_trtllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/utils/export_trtllm.py -------------------------------------------------------------------------------- /llmc/utils/export_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/utils/export_vllm.py -------------------------------------------------------------------------------- /llmc/utils/registry_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/utils/registry_factory.py -------------------------------------------------------------------------------- /llmc/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/llmc/utils/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/runtime.txt 2 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/requirements/docs.txt -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/requirements/runtime.txt -------------------------------------------------------------------------------- /scripts/run_awq_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/scripts/run_awq_llama.sh -------------------------------------------------------------------------------- /scripts/run_llama_32.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/scripts/run_llama_32.sh -------------------------------------------------------------------------------- /scripts/run_tesseraq_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/scripts/run_tesseraq_llama.sh -------------------------------------------------------------------------------- /tools/attention_vis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/tools/attention_vis.py -------------------------------------------------------------------------------- /tools/download_calib_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/tools/download_calib_dataset.py -------------------------------------------------------------------------------- /tools/download_eval_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/tools/download_eval_dataset.py -------------------------------------------------------------------------------- /tools/llm_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/tools/llm_eval.py -------------------------------------------------------------------------------- /tools/quant_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Intelligent-Computing-Lab-Panda/TesseraQ/HEAD/tools/quant_analysis.py --------------------------------------------------------------------------------