├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── act_scales └── README.md ├── assets └── SmoothQuant.pdf ├── examples ├── export_int8_model.py ├── generate_act_scales.py ├── ppl_eval.sh ├── smoothquant_llama_demo.ipynb ├── smoothquant_opt_demo.ipynb └── smoothquant_opt_real_int8_demo.ipynb ├── figures ├── accuracy.png ├── ft_latency_mem.png ├── intuition.png ├── migrate.jpg ├── quantization_flow.png └── torch_latency_mem.png ├── setup.py └── smoothquant ├── __init__.py ├── calibration.py ├── fake_quant.py ├── opt.py ├── ppl_eval.py └── smooth.py /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/README.md -------------------------------------------------------------------------------- /act_scales/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/act_scales/README.md -------------------------------------------------------------------------------- /assets/SmoothQuant.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/assets/SmoothQuant.pdf -------------------------------------------------------------------------------- /examples/export_int8_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/examples/export_int8_model.py -------------------------------------------------------------------------------- /examples/generate_act_scales.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/examples/generate_act_scales.py -------------------------------------------------------------------------------- /examples/ppl_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/examples/ppl_eval.sh -------------------------------------------------------------------------------- /examples/smoothquant_llama_demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/examples/smoothquant_llama_demo.ipynb -------------------------------------------------------------------------------- /examples/smoothquant_opt_demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/examples/smoothquant_opt_demo.ipynb -------------------------------------------------------------------------------- /examples/smoothquant_opt_real_int8_demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/examples/smoothquant_opt_real_int8_demo.ipynb -------------------------------------------------------------------------------- /figures/accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/figures/accuracy.png -------------------------------------------------------------------------------- /figures/ft_latency_mem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/figures/ft_latency_mem.png -------------------------------------------------------------------------------- /figures/intuition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/figures/intuition.png -------------------------------------------------------------------------------- /figures/migrate.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/figures/migrate.jpg -------------------------------------------------------------------------------- /figures/quantization_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/figures/quantization_flow.png -------------------------------------------------------------------------------- /figures/torch_latency_mem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/figures/torch_latency_mem.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/setup.py -------------------------------------------------------------------------------- /smoothquant/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /smoothquant/calibration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/smoothquant/calibration.py -------------------------------------------------------------------------------- /smoothquant/fake_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/smoothquant/fake_quant.py -------------------------------------------------------------------------------- /smoothquant/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/smoothquant/opt.py -------------------------------------------------------------------------------- /smoothquant/ppl_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/smoothquant/ppl_eval.py -------------------------------------------------------------------------------- /smoothquant/smooth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-han-lab/smoothquant/HEAD/smoothquant/smooth.py --------------------------------------------------------------------------------