├── .github └── images │ ├── compute_path.png │ ├── e2e_benchmarks.jpg │ └── kernel_benchmarks.jpg ├── .gitignore ├── LICENSE ├── README.md ├── csrc ├── dequantize_quick.cuh ├── gemm_cuda_quick.cu ├── gemm_cuda_quick.h └── pybind.cpp ├── examples ├── basic_quant.py ├── benchmark.py └── eval.py ├── quick └── awq │ ├── __init__.py │ ├── evaluation │ ├── __init__.py │ ├── eval_utils.py │ ├── humaneval_utils.py │ └── kl_divergence.py │ ├── models │ ├── __init__.py │ ├── _config.py │ ├── aquila.py │ ├── auto.py │ ├── baichuan.py │ ├── base.py │ ├── bloom.py │ ├── falcon.py │ ├── gpt_bigcode.py │ ├── gpt_neox.py │ ├── gptj.py │ ├── llama.py │ ├── llava.py │ ├── mistral.py │ ├── mixtral.py │ ├── mpt.py │ ├── opt.py │ ├── qwen.py │ └── yi.py │ ├── modules │ ├── __init__.py │ ├── act.py │ ├── fused │ │ ├── __init__.py │ │ ├── attn.py │ │ ├── block.py │ │ ├── cache.py │ │ ├── mlp.py │ │ ├── model.py │ │ └── norm.py │ └── linear │ │ ├── __init__.py │ │ ├── exllama.py │ │ ├── exllamav2.py │ │ ├── gemm.py │ │ ├── gemv.py │ │ └── quick.py │ ├── quantize │ ├── __init__.py │ ├── quantizer.py │ └── scale.py │ └── utils │ ├── __init__.py │ ├── calib_data.py │ ├── fused_utils.py │ ├── module.py │ ├── packing_utils.py │ ├── parallel.py │ └── utils.py └── setup.py /.github/images/compute_path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/.github/images/compute_path.png -------------------------------------------------------------------------------- /.github/images/e2e_benchmarks.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/.github/images/e2e_benchmarks.jpg -------------------------------------------------------------------------------- /.github/images/kernel_benchmarks.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/.github/images/kernel_benchmarks.jpg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/README.md -------------------------------------------------------------------------------- /csrc/dequantize_quick.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/csrc/dequantize_quick.cuh -------------------------------------------------------------------------------- /csrc/gemm_cuda_quick.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/csrc/gemm_cuda_quick.cu -------------------------------------------------------------------------------- /csrc/gemm_cuda_quick.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/csrc/gemm_cuda_quick.h -------------------------------------------------------------------------------- /csrc/pybind.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/csrc/pybind.cpp -------------------------------------------------------------------------------- /examples/basic_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/examples/basic_quant.py -------------------------------------------------------------------------------- /examples/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/examples/benchmark.py -------------------------------------------------------------------------------- /examples/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/examples/eval.py -------------------------------------------------------------------------------- /quick/awq/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.8" 2 | from quick.awq.models.auto import AutoAWQForCausalLM -------------------------------------------------------------------------------- /quick/awq/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/evaluation/__init__.py -------------------------------------------------------------------------------- /quick/awq/evaluation/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/evaluation/eval_utils.py -------------------------------------------------------------------------------- /quick/awq/evaluation/humaneval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/evaluation/humaneval_utils.py -------------------------------------------------------------------------------- /quick/awq/evaluation/kl_divergence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/evaluation/kl_divergence.py -------------------------------------------------------------------------------- /quick/awq/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/__init__.py -------------------------------------------------------------------------------- /quick/awq/models/_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/_config.py -------------------------------------------------------------------------------- /quick/awq/models/aquila.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/aquila.py -------------------------------------------------------------------------------- /quick/awq/models/auto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/auto.py -------------------------------------------------------------------------------- /quick/awq/models/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/baichuan.py -------------------------------------------------------------------------------- /quick/awq/models/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/base.py -------------------------------------------------------------------------------- /quick/awq/models/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/bloom.py -------------------------------------------------------------------------------- /quick/awq/models/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/falcon.py -------------------------------------------------------------------------------- /quick/awq/models/gpt_bigcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/gpt_bigcode.py -------------------------------------------------------------------------------- /quick/awq/models/gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/gpt_neox.py -------------------------------------------------------------------------------- /quick/awq/models/gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/gptj.py -------------------------------------------------------------------------------- /quick/awq/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/llama.py -------------------------------------------------------------------------------- /quick/awq/models/llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/llava.py -------------------------------------------------------------------------------- /quick/awq/models/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/mistral.py -------------------------------------------------------------------------------- /quick/awq/models/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/mixtral.py -------------------------------------------------------------------------------- /quick/awq/models/mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/mpt.py -------------------------------------------------------------------------------- /quick/awq/models/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/opt.py -------------------------------------------------------------------------------- /quick/awq/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/qwen.py -------------------------------------------------------------------------------- /quick/awq/models/yi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/models/yi.py -------------------------------------------------------------------------------- /quick/awq/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /quick/awq/modules/act.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/act.py -------------------------------------------------------------------------------- /quick/awq/modules/fused/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /quick/awq/modules/fused/attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/fused/attn.py -------------------------------------------------------------------------------- /quick/awq/modules/fused/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/fused/block.py -------------------------------------------------------------------------------- /quick/awq/modules/fused/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/fused/cache.py -------------------------------------------------------------------------------- /quick/awq/modules/fused/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/fused/mlp.py -------------------------------------------------------------------------------- /quick/awq/modules/fused/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/fused/model.py -------------------------------------------------------------------------------- /quick/awq/modules/fused/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/fused/norm.py -------------------------------------------------------------------------------- /quick/awq/modules/linear/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/linear/__init__.py -------------------------------------------------------------------------------- /quick/awq/modules/linear/exllama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/linear/exllama.py -------------------------------------------------------------------------------- /quick/awq/modules/linear/exllamav2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/linear/exllamav2.py -------------------------------------------------------------------------------- /quick/awq/modules/linear/gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/linear/gemm.py -------------------------------------------------------------------------------- /quick/awq/modules/linear/gemv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/linear/gemv.py -------------------------------------------------------------------------------- /quick/awq/modules/linear/quick.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/modules/linear/quick.py -------------------------------------------------------------------------------- /quick/awq/quantize/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /quick/awq/quantize/quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/quantize/quantizer.py -------------------------------------------------------------------------------- /quick/awq/quantize/scale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/quantize/scale.py -------------------------------------------------------------------------------- /quick/awq/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /quick/awq/utils/calib_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/utils/calib_data.py -------------------------------------------------------------------------------- /quick/awq/utils/fused_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/utils/fused_utils.py -------------------------------------------------------------------------------- /quick/awq/utils/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/utils/module.py -------------------------------------------------------------------------------- /quick/awq/utils/packing_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/utils/packing_utils.py -------------------------------------------------------------------------------- /quick/awq/utils/parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/utils/parallel.py -------------------------------------------------------------------------------- /quick/awq/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/quick/awq/utils/utils.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SqueezeBits/QUICK/HEAD/setup.py --------------------------------------------------------------------------------