├── .gitignore ├── LICENSE ├── README.md ├── assets └── qtip_overview.png ├── eval ├── eval_ppl.py ├── eval_zeroshot.py └── interactive_gen.py ├── example.sh ├── lib ├── __init__.py ├── algo │ ├── __init__.py │ ├── finetune.py │ └── ldlq.py ├── codebook │ ├── __init__.py │ └── bitshift.py ├── linear │ ├── __init__.py │ └── quantized_linear.py └── utils │ ├── __init__.py │ ├── data_utils.py │ ├── finetune.py │ ├── gptq_data_utils.py │ ├── graph_wrapper.py │ ├── kernel_check.py │ ├── kernel_decompress.py │ ├── math_utils.py │ ├── matmul_had.py │ ├── misc.py │ ├── shard_model.py │ └── unsafe_import.py ├── model ├── cache_utils.py └── llama.py ├── qtip-kernels ├── .gitignore ├── prof.sh ├── setup.py ├── src │ ├── Makefile │ ├── baseline.py │ ├── inference.cu │ ├── inference.h │ ├── qtip_torch.cu │ ├── test.cu │ └── wrapper.cpp └── test_decompress_matvec.py ├── quantize_llama ├── finetune_e2e_llama.py ├── hfize_llama.py ├── input_hessian_llama.py ├── manifest_model.py └── quantize_finetune_llama.py ├── requirements.txt └── scripts └── download_hf.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/README.md -------------------------------------------------------------------------------- /assets/qtip_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/assets/qtip_overview.png -------------------------------------------------------------------------------- /eval/eval_ppl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/eval/eval_ppl.py -------------------------------------------------------------------------------- /eval/eval_zeroshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/eval/eval_zeroshot.py -------------------------------------------------------------------------------- /eval/interactive_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/eval/interactive_gen.py -------------------------------------------------------------------------------- /example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/example.sh -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lib/algo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/algo/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/algo/finetune.py -------------------------------------------------------------------------------- /lib/algo/ldlq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/algo/ldlq.py -------------------------------------------------------------------------------- /lib/codebook/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/codebook/__init__.py -------------------------------------------------------------------------------- /lib/codebook/bitshift.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/codebook/bitshift.py -------------------------------------------------------------------------------- /lib/linear/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/linear/__init__.py -------------------------------------------------------------------------------- /lib/linear/quantized_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/linear/quantized_linear.py -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/data_utils.py -------------------------------------------------------------------------------- /lib/utils/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/finetune.py -------------------------------------------------------------------------------- /lib/utils/gptq_data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/gptq_data_utils.py -------------------------------------------------------------------------------- /lib/utils/graph_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/graph_wrapper.py -------------------------------------------------------------------------------- /lib/utils/kernel_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/kernel_check.py -------------------------------------------------------------------------------- /lib/utils/kernel_decompress.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/kernel_decompress.py -------------------------------------------------------------------------------- /lib/utils/math_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/math_utils.py -------------------------------------------------------------------------------- /lib/utils/matmul_had.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/matmul_had.py -------------------------------------------------------------------------------- /lib/utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/misc.py -------------------------------------------------------------------------------- /lib/utils/shard_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/shard_model.py -------------------------------------------------------------------------------- /lib/utils/unsafe_import.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/lib/utils/unsafe_import.py -------------------------------------------------------------------------------- /model/cache_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/model/cache_utils.py -------------------------------------------------------------------------------- /model/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/model/llama.py -------------------------------------------------------------------------------- /qtip-kernels/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/.gitignore -------------------------------------------------------------------------------- /qtip-kernels/prof.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/prof.sh -------------------------------------------------------------------------------- /qtip-kernels/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/setup.py -------------------------------------------------------------------------------- /qtip-kernels/src/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/src/Makefile -------------------------------------------------------------------------------- /qtip-kernels/src/baseline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/src/baseline.py -------------------------------------------------------------------------------- /qtip-kernels/src/inference.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/src/inference.cu -------------------------------------------------------------------------------- /qtip-kernels/src/inference.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/src/inference.h -------------------------------------------------------------------------------- /qtip-kernels/src/qtip_torch.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/src/qtip_torch.cu -------------------------------------------------------------------------------- /qtip-kernels/src/test.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/src/test.cu -------------------------------------------------------------------------------- /qtip-kernels/src/wrapper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/src/wrapper.cpp -------------------------------------------------------------------------------- /qtip-kernels/test_decompress_matvec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/qtip-kernels/test_decompress_matvec.py -------------------------------------------------------------------------------- /quantize_llama/finetune_e2e_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/quantize_llama/finetune_e2e_llama.py -------------------------------------------------------------------------------- /quantize_llama/hfize_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/quantize_llama/hfize_llama.py -------------------------------------------------------------------------------- /quantize_llama/input_hessian_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/quantize_llama/input_hessian_llama.py -------------------------------------------------------------------------------- /quantize_llama/manifest_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/quantize_llama/manifest_model.py -------------------------------------------------------------------------------- /quantize_llama/quantize_finetune_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/quantize_llama/quantize_finetune_llama.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/download_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cornell-RelaxML/qtip/HEAD/scripts/download_hf.py --------------------------------------------------------------------------------