├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── REALQUANT.md ├── benchmarks ├── benchmark_lm_eval.py ├── benchmark_model.py ├── kernel_benchmark.py ├── layer_benchmark.py ├── qattention_benchmark.py └── qlinear_benchmark.py ├── deepseek_v3 ├── eval_utils.py ├── generate.py ├── kernel.py └── model.py ├── deploy ├── __init__.py ├── functional │ ├── __init__.py │ ├── online_trans.py │ └── quantization.py ├── kernels │ ├── bindings.cpp │ ├── block_matmul.py │ ├── flashinfer.cu │ ├── gemm.cu │ ├── include │ │ ├── common.h │ │ ├── flashinfer.h │ │ ├── flashinfer │ │ │ ├── cp_async.cuh │ │ │ ├── decode.cuh │ │ │ ├── layout.cuh │ │ │ ├── math.cuh │ │ │ ├── mma.cuh │ │ │ ├── page.cuh │ │ │ ├── permuted_smem.cuh │ │ │ ├── prefill.cuh │ │ │ ├── quantization.cuh │ │ │ ├── rope.cuh │ │ │ ├── state.cuh │ │ │ ├── utils.cuh │ │ │ └── vec_dtypes.cuh │ │ ├── gemm.h │ │ ├── int4.h │ │ ├── quant.h │ │ └── util.h │ ├── kron_matmul.py │ └── quant.cu ├── nn │ ├── __init__.py │ ├── linear.py │ ├── normalization.py │ ├── online_trans.py │ └── quantization.py └── transformers │ ├── __init__.py │ ├── kv_cache.py │ └── modeling_llama.py ├── figures ├── FlatQuant.jpg └── flatness.jpg ├── flatquant ├── __init__.py ├── args_utils.py ├── data_utils.py ├── eval_utils.py ├── flat_linear.py ├── flat_utils.py ├── flatness.py ├── function_utils.py ├── hadamard_utils.py ├── model_tools │ ├── __init__.py │ ├── deepseekv3_utils.py │ ├── llama31_utils.py │ ├── llama_utils.py │ └── qwen_utils.py ├── model_utils.py ├── quant_utils.py ├── train_utils.py ├── trans_utils.py └── utils.py ├── get_snapshot_dir.py ├── gptq_utils.py ├── main.py ├── main_dpskv3.py ├── plot_flatness.py ├── requirements.txt ├── requirements_llama2.txt ├── scripts ├── deepseek │ ├── deepseek-r1 │ │ └── w4a4kv4.sh │ └── deepseek-v3 │ │ └── w4a4kv4.sh ├── llama-2 │ ├── llama-2-13b │ │ └── w4a4kv4.sh │ ├── llama-2-70b │ │ └── w4a4kv4.sh │ └── llama-2-7b │ │ └── w4a4kv4.sh ├── llama-3.1-instruct │ └── llama-3.1-instruct-8b │ │ └── w4a4kv4.sh ├── llama-3 │ ├── llama-3-70b │ │ └── w4a4kv4.sh │ └── llama-3-8b │ │ └── w4a4kv4.sh └── qwen-2.5-instruct │ ├── qwen-2.5-instruct-32b │ └── w4a4kv4.sh │ └── qwen-2.5-instruct-7b │ └── w4a4kv4.sh ├── setup.py └── vllm_custom └── model_executor ├── fake_quantized_models ├── llama_fake_quantized.py ├── llama_flatquant.py ├── qwen2_fake_quantized.py ├── qwen2_flatquant.py └── registry.py └── layers └── quantization └── utils ├── fake_quant_utils.py └── flatquant_utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/.gitmodules -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/README.md -------------------------------------------------------------------------------- /REALQUANT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/REALQUANT.md -------------------------------------------------------------------------------- /benchmarks/benchmark_lm_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/benchmarks/benchmark_lm_eval.py -------------------------------------------------------------------------------- /benchmarks/benchmark_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/benchmarks/benchmark_model.py -------------------------------------------------------------------------------- /benchmarks/kernel_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/benchmarks/kernel_benchmark.py -------------------------------------------------------------------------------- /benchmarks/layer_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/benchmarks/layer_benchmark.py -------------------------------------------------------------------------------- /benchmarks/qattention_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/benchmarks/qattention_benchmark.py -------------------------------------------------------------------------------- /benchmarks/qlinear_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/benchmarks/qlinear_benchmark.py -------------------------------------------------------------------------------- /deepseek_v3/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deepseek_v3/eval_utils.py -------------------------------------------------------------------------------- /deepseek_v3/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deepseek_v3/generate.py -------------------------------------------------------------------------------- /deepseek_v3/kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deepseek_v3/kernel.py -------------------------------------------------------------------------------- /deepseek_v3/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deepseek_v3/model.py -------------------------------------------------------------------------------- /deploy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/__init__.py -------------------------------------------------------------------------------- /deploy/functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/functional/__init__.py -------------------------------------------------------------------------------- /deploy/functional/online_trans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/functional/online_trans.py -------------------------------------------------------------------------------- /deploy/functional/quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/functional/quantization.py -------------------------------------------------------------------------------- /deploy/kernels/bindings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/bindings.cpp -------------------------------------------------------------------------------- /deploy/kernels/block_matmul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/block_matmul.py -------------------------------------------------------------------------------- /deploy/kernels/flashinfer.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/flashinfer.cu -------------------------------------------------------------------------------- /deploy/kernels/gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/gemm.cu -------------------------------------------------------------------------------- /deploy/kernels/include/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/common.h -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer.h -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/cp_async.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/cp_async.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/decode.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/decode.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/layout.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/layout.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/math.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/math.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/mma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/mma.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/page.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/page.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/permuted_smem.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/permuted_smem.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/prefill.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/prefill.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/quantization.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/quantization.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/rope.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/rope.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/state.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/state.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/utils.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/flashinfer/vec_dtypes.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/flashinfer/vec_dtypes.cuh -------------------------------------------------------------------------------- /deploy/kernels/include/gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/gemm.h -------------------------------------------------------------------------------- /deploy/kernels/include/int4.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/int4.h -------------------------------------------------------------------------------- /deploy/kernels/include/quant.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/quant.h -------------------------------------------------------------------------------- /deploy/kernels/include/util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/include/util.h -------------------------------------------------------------------------------- /deploy/kernels/kron_matmul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/kron_matmul.py -------------------------------------------------------------------------------- /deploy/kernels/quant.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/kernels/quant.cu -------------------------------------------------------------------------------- /deploy/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/nn/__init__.py -------------------------------------------------------------------------------- /deploy/nn/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/nn/linear.py -------------------------------------------------------------------------------- /deploy/nn/normalization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/nn/normalization.py -------------------------------------------------------------------------------- /deploy/nn/online_trans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/nn/online_trans.py -------------------------------------------------------------------------------- /deploy/nn/quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/nn/quantization.py -------------------------------------------------------------------------------- /deploy/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/transformers/__init__.py -------------------------------------------------------------------------------- /deploy/transformers/kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/transformers/kv_cache.py -------------------------------------------------------------------------------- /deploy/transformers/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/deploy/transformers/modeling_llama.py -------------------------------------------------------------------------------- /figures/FlatQuant.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/figures/FlatQuant.jpg -------------------------------------------------------------------------------- /figures/flatness.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/figures/flatness.jpg -------------------------------------------------------------------------------- /flatquant/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flatquant/args_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/args_utils.py -------------------------------------------------------------------------------- /flatquant/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/data_utils.py -------------------------------------------------------------------------------- /flatquant/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/eval_utils.py -------------------------------------------------------------------------------- /flatquant/flat_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/flat_linear.py -------------------------------------------------------------------------------- /flatquant/flat_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/flat_utils.py -------------------------------------------------------------------------------- /flatquant/flatness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/flatness.py -------------------------------------------------------------------------------- /flatquant/function_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/function_utils.py -------------------------------------------------------------------------------- /flatquant/hadamard_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/hadamard_utils.py -------------------------------------------------------------------------------- /flatquant/model_tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flatquant/model_tools/deepseekv3_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/model_tools/deepseekv3_utils.py -------------------------------------------------------------------------------- /flatquant/model_tools/llama31_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/model_tools/llama31_utils.py -------------------------------------------------------------------------------- /flatquant/model_tools/llama_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/model_tools/llama_utils.py -------------------------------------------------------------------------------- /flatquant/model_tools/qwen_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/model_tools/qwen_utils.py -------------------------------------------------------------------------------- /flatquant/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/model_utils.py -------------------------------------------------------------------------------- /flatquant/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/quant_utils.py -------------------------------------------------------------------------------- /flatquant/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/train_utils.py -------------------------------------------------------------------------------- /flatquant/trans_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/trans_utils.py -------------------------------------------------------------------------------- /flatquant/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/flatquant/utils.py -------------------------------------------------------------------------------- /get_snapshot_dir.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/get_snapshot_dir.py -------------------------------------------------------------------------------- /gptq_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/gptq_utils.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/main.py -------------------------------------------------------------------------------- /main_dpskv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/main_dpskv3.py -------------------------------------------------------------------------------- /plot_flatness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/plot_flatness.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/requirements.txt -------------------------------------------------------------------------------- /requirements_llama2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/requirements_llama2.txt -------------------------------------------------------------------------------- /scripts/deepseek/deepseek-r1/w4a4kv4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/scripts/deepseek/deepseek-r1/w4a4kv4.sh -------------------------------------------------------------------------------- /scripts/deepseek/deepseek-v3/w4a4kv4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/scripts/deepseek/deepseek-v3/w4a4kv4.sh -------------------------------------------------------------------------------- /scripts/llama-2/llama-2-13b/w4a4kv4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/scripts/llama-2/llama-2-13b/w4a4kv4.sh -------------------------------------------------------------------------------- /scripts/llama-2/llama-2-70b/w4a4kv4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/scripts/llama-2/llama-2-70b/w4a4kv4.sh -------------------------------------------------------------------------------- /scripts/llama-2/llama-2-7b/w4a4kv4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/scripts/llama-2/llama-2-7b/w4a4kv4.sh -------------------------------------------------------------------------------- /scripts/llama-3.1-instruct/llama-3.1-instruct-8b/w4a4kv4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/scripts/llama-3.1-instruct/llama-3.1-instruct-8b/w4a4kv4.sh -------------------------------------------------------------------------------- /scripts/llama-3/llama-3-70b/w4a4kv4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/scripts/llama-3/llama-3-70b/w4a4kv4.sh -------------------------------------------------------------------------------- /scripts/llama-3/llama-3-8b/w4a4kv4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/scripts/llama-3/llama-3-8b/w4a4kv4.sh -------------------------------------------------------------------------------- /scripts/qwen-2.5-instruct/qwen-2.5-instruct-32b/w4a4kv4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/scripts/qwen-2.5-instruct/qwen-2.5-instruct-32b/w4a4kv4.sh -------------------------------------------------------------------------------- /scripts/qwen-2.5-instruct/qwen-2.5-instruct-7b/w4a4kv4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/scripts/qwen-2.5-instruct/qwen-2.5-instruct-7b/w4a4kv4.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/setup.py -------------------------------------------------------------------------------- /vllm_custom/model_executor/fake_quantized_models/llama_fake_quantized.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/vllm_custom/model_executor/fake_quantized_models/llama_fake_quantized.py -------------------------------------------------------------------------------- /vllm_custom/model_executor/fake_quantized_models/llama_flatquant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/vllm_custom/model_executor/fake_quantized_models/llama_flatquant.py -------------------------------------------------------------------------------- /vllm_custom/model_executor/fake_quantized_models/qwen2_fake_quantized.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/vllm_custom/model_executor/fake_quantized_models/qwen2_fake_quantized.py -------------------------------------------------------------------------------- /vllm_custom/model_executor/fake_quantized_models/qwen2_flatquant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/vllm_custom/model_executor/fake_quantized_models/qwen2_flatquant.py -------------------------------------------------------------------------------- /vllm_custom/model_executor/fake_quantized_models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/vllm_custom/model_executor/fake_quantized_models/registry.py -------------------------------------------------------------------------------- /vllm_custom/model_executor/layers/quantization/utils/fake_quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/vllm_custom/model_executor/layers/quantization/utils/fake_quant_utils.py -------------------------------------------------------------------------------- /vllm_custom/model_executor/layers/quantization/utils/flatquant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruikangliu/FlatQuant/HEAD/vllm_custom/model_executor/layers/quantization/utils/flatquant_utils.py --------------------------------------------------------------------------------