├── .MAINTAINERS ├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yaml │ ├── doc-edit.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md ├── TODO.txt ├── actions │ └── prepare-code-coverage │ │ └── action.yml └── workflows │ ├── build-and-publish-release-images.yaml │ ├── linkcheck.yml │ ├── linkspector │ └── linkspector.yml │ ├── quality-check.yaml │ ├── ready-label-check.yaml │ ├── result.xml.fail │ ├── result.xml.success │ ├── set-comment.yaml │ ├── test-check-transformers.yaml │ └── test-check.yaml ├── .gitignore ├── .readthedocs.yaml ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DEVELOPING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── NOTICE ├── README.md ├── docs ├── Makefile ├── README.md ├── assets │ ├── llmcompressor-icon-white.png │ ├── llmcompressor-icon.png │ └── llmcompressor-user-flows.png ├── developer │ ├── code-of-conduct.md │ ├── contributing.md │ ├── developing.md │ └── index.md ├── examples │ └── index.md ├── getting-started │ ├── compress.md │ ├── deploy.md │ ├── faq.md │ ├── index.md │ └── install.md ├── guides │ ├── compression_formats.md │ ├── compression_schemes.md │ ├── index.md │ └── saving_a_model.md ├── index.md ├── observers.md ├── scripts │ ├── __init__.py │ ├── gen_files.py │ └── mathjax.js └── stylesheets │ └── style.css ├── examples ├── autoround │ ├── README.md │ └── llama3_example.py ├── awq │ ├── README.md │ ├── llama_example.py │ ├── qwen3-vl-30b-a3b-Instruct-example.py │ ├── qwen3_coder_moe_example.py │ └── qwen3_moe_example.py ├── big_models_with_sequential_onloading │ ├── README.md │ ├── assets │ │ └── sequential_onloading.png │ └── llama3.3_70b.py ├── compressed_inference │ └── fp8_compressed_inference.py ├── finetuning │ ├── configure_fsdp.md │ ├── example_alternating_recipe.yaml │ ├── example_fsdp_config.yaml │ └── example_single_gpu_config.yaml ├── model_free_ptq │ ├── README.md │ └── kimi_k2_thinking_fp8_block.py ├── multimodal_audio │ ├── README.md │ └── whisper_example.py ├── multimodal_vision │ ├── README.md │ ├── README_internvl3.md │ ├── gemma3_example.py │ ├── idefics3_example.py │ ├── internvl3_example.py │ ├── llama4_example.py │ ├── llava_example.py │ ├── mistral3_chat_template.json │ ├── mistral3_example.py │ ├── mllama_example.py │ ├── phi3_vision_example.py │ ├── pixtral_example.py │ ├── qwen2_vl_example.py │ └── qwen_2_5_vl_example.py ├── quantization_2of4_sparse_w4a16 │ ├── 2of4_w4a16_group-128_recipe.yaml │ ├── 2of4_w4a16_recipe.yaml │ ├── README.md │ └── llama7b_sparse_w4a16.py ├── quantization_kv_cache │ ├── README.md │ ├── gemma2_fp8_kv_example.py │ ├── llama3_fp8_kv_example.py │ └── phi3.5_fp8_kv_example.py ├── quantization_non_uniform │ ├── README.md │ ├── quantization_fp8_multiple_strategies.py │ ├── quantization_int4_int8.py │ ├── quantization_multiple_modifiers.py │ └── quantization_nvfp4_fp8.py ├── quantization_w4a16 │ ├── README.md │ └── llama3_example.py ├── quantization_w4a16_fp4 │ ├── llama3_example.py │ └── qwen3_example.py ├── quantization_w4a4_fp4 │ ├── README.md │ ├── llama3_example.py │ ├── llama4_example.py │ ├── qwen3_next_example.py │ ├── qwen3_vl_moe_w4a4_fp4.py │ └── qwen_30b_a3b.py ├── quantization_w8a8_fp8 │ ├── README.md │ ├── README_granite4.md │ ├── fp8_block_example.py │ ├── gemma2_example.py │ ├── granite4_example.py │ ├── llama3.2_vision_example.py │ ├── llama3_example.py │ ├── llama4_fp8_block_example.py │ ├── llava1.5_example.py │ ├── qwen2vl_example.py │ ├── qwen3_next_example.py │ ├── qwen3_vl_moe_fp8_example.py │ ├── qwen_2_5_vl_example.py │ └── whisper_example.py ├── quantization_w8a8_int8 │ ├── README.md │ ├── gemma2_example.py │ └── llama3_example.py ├── quantizing_moe │ ├── README.md │ ├── deepseek_r1_example.py │ ├── mixtral_example.py │ └── qwen_example.py ├── sparse_2of4_quantization_fp8 │ ├── README.md │ └── llama3_8b_2of4.py └── transform │ ├── README.md │ ├── quip_example.py │ └── spinquant_example.py ├── experimental ├── README.md ├── llama3_attention.py ├── mistral │ ├── README.md │ └── fp8_quantize.py └── mxfp4 │ └── llama3_mxfp4.py ├── mkdocs.yml ├── pyproject.toml ├── setup.py ├── src └── llmcompressor │ ├── __init__.py │ ├── args │ ├── README.md │ ├── __init__.py │ ├── dataset_arguments.py │ ├── model_arguments.py │ ├── recipe_arguments.py │ └── utils.py │ ├── core │ ├── __init__.py │ ├── events │ │ ├── __init__.py │ │ └── event.py │ ├── helpers.py │ ├── lifecycle.py │ ├── model_layer.py │ ├── session.py │ ├── session_functions.py │ └── state.py │ ├── datasets │ ├── __init__.py │ └── utils.py │ ├── entrypoints │ ├── README.md │ ├── __init__.py │ ├── model_free │ │ ├── __init__.py │ │ ├── helpers.py │ │ ├── lifecycle.py │ │ ├── model_utils.py │ │ └── save_utils.py │ ├── oneshot.py │ └── utils.py │ ├── logger.py │ ├── metrics │ ├── __init__.py │ ├── logger.py │ └── utils │ │ ├── __init__.py │ │ └── frequency_manager.py │ ├── modeling │ ├── __init__.py │ ├── deepseek_v3.py │ ├── fuse.py │ ├── granite4.py │ ├── llama4.py │ ├── moe_context.py │ ├── prepare.py │ ├── qwen3_moe.py │ ├── qwen3_next_moe.py │ └── qwen3_vl_moe.py │ ├── modifiers │ ├── README.md │ ├── __init__.py │ ├── autoround │ │ ├── __init__.py │ │ └── base.py │ ├── awq │ │ ├── __init__.py │ │ ├── base.py │ │ └── mappings.py │ ├── experimental │ │ └── __init__.py │ ├── factory.py │ ├── interface.py │ ├── logarithmic_equalization │ │ ├── __init__.py │ │ └── base.py │ ├── modifier.py │ ├── obcq │ │ ├── __init__.py │ │ └── sgpt_base.py │ ├── pruning │ │ ├── __init__.py │ │ ├── constant │ │ │ ├── __init__.py │ │ │ └── base.py │ │ ├── helpers.py │ │ ├── magnitude │ │ │ ├── __init__.py │ │ │ └── base.py │ │ ├── sparsegpt │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── sgpt_base.py │ │ │ └── sgpt_sparsify.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ └── pytorch │ │ │ │ ├── __init__.py │ │ │ │ ├── layer_mask.py │ │ │ │ └── mask_factory.py │ │ └── wanda │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── wanda_sparsify.py │ ├── quantization │ │ ├── __init__.py │ │ ├── calibration.py │ │ ├── gptq │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── gptq_quantize.py │ │ └── quantization │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── mixin.py │ ├── smoothquant │ │ ├── README.md │ │ ├── __init__.py │ │ ├── base.py │ │ └── utils.py │ ├── transform │ │ ├── __init__.py │ │ ├── quip │ │ │ ├── __init__.py │ │ │ └── base.py │ │ └── spinquant │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── mappings.py │ │ │ └── norm_mappings.py │ └── utils │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── helpers.py │ │ ├── hooks.py │ │ └── pytorch_helpers.py │ ├── observers │ ├── __init__.py │ ├── base.py │ ├── helpers.py │ ├── min_max.py │ ├── moving_base.py │ └── mse.py │ ├── pipelines │ ├── __init__.py │ ├── basic │ │ ├── __init__.py │ │ └── pipeline.py │ ├── cache.py │ ├── data_free │ │ ├── __init__.py │ │ └── pipeline.py │ ├── independent │ │ ├── __init__.py │ │ └── pipeline.py │ ├── registry.py │ └── sequential │ │ ├── README.md │ │ ├── __init__.py │ │ ├── ast_helpers.py │ │ ├── ast_utils │ │ ├── auto_wrapper.py │ │ ├── control_flow_analyzer.py │ │ └── name_analyzer.py │ │ ├── helpers.py │ │ ├── pipeline.py │ │ └── transformers_helpers.py │ ├── pytorch │ ├── __init__.py │ ├── model_load │ │ ├── __init__.py │ │ └── helpers.py │ └── utils │ │ ├── __init__.py │ │ ├── helpers.py │ │ ├── sparsification.py │ │ └── sparsification_info │ │ ├── __init__.py │ │ ├── configs.py │ │ ├── helpers.py │ │ └── module_sparsification_info.py │ ├── recipe │ ├── __init__.py │ ├── metadata.py │ ├── recipe.py │ └── utils.py │ ├── sentinel.py │ ├── transformers │ ├── __init__.py │ ├── compression │ │ ├── __init__.py │ │ ├── compressed_tensors_utils.py │ │ ├── helpers.py │ │ └── sparsity_metadata_config.py │ ├── data │ │ ├── __init__.py │ │ ├── base.py │ │ ├── c4.py │ │ ├── cnn_dailymail.py │ │ ├── custom.py │ │ ├── data_helpers.py │ │ ├── evolcodealpaca.py │ │ ├── flickr_30k.py │ │ ├── gsm8k.py │ │ ├── open_platypus.py │ │ ├── peoples_speech.py │ │ ├── ultrachat_200k.py │ │ └── wikitext.py │ ├── tracing │ │ ├── __init__.py │ │ └── debug.py │ └── utils │ │ ├── __init__.py │ │ ├── helpers.py │ │ └── preprocessing_functions.py │ ├── typing.py │ └── utils │ ├── __init__.py │ ├── dev.py │ ├── fsdp │ ├── __init__.py │ ├── context.py │ └── helpers.py │ ├── helpers.py │ ├── metric_logging.py │ ├── pytorch │ ├── __init__.py │ ├── module.py │ └── utils.py │ └── transformers.py ├── tests ├── __init__.py ├── e2e │ ├── __init__.py │ ├── e2e_utils.py │ └── vLLM │ │ ├── __init__.py │ │ ├── configs │ │ ├── fp4_nvfp4.yaml │ │ ├── fp4_nvfp4a16.yaml │ │ ├── fp8_block.yaml │ │ ├── fp8_dynamic_per_token.yaml │ │ ├── fp8_static_per_tensor.yaml │ │ ├── fp8_weight_only_channel.yaml │ │ ├── fp8_weight_only_tensor.yaml │ │ ├── int8_channel_weight_static_per_tensor_act.yaml │ │ ├── int8_dynamic_per_token.yaml │ │ ├── int8_tensor_weight_static_per_tensor_act.yaml │ │ ├── kv_cache_gptq_tinyllama.yaml │ │ ├── kv_cache_phi3.yaml │ │ ├── kv_cache_tinyllama.yaml │ │ ├── qwen3_fp4_nvfp4.yaml │ │ ├── qwen3_fp8_dynamic_per_token.yaml │ │ ├── sparse2of4_fp8_dynamic.yaml │ │ ├── sparse_24.yaml │ │ ├── w4a16_2of4_channel_quant.yaml │ │ ├── w4a16_2of4_grouped_quant.yaml │ │ ├── w4a16_actorder_group.yaml │ │ ├── w4a16_actorder_none.yaml │ │ ├── w4a16_actorder_weight.yaml │ │ ├── w4a16_channel_quant.yaml │ │ ├── w4a16_grouped_quant.yaml │ │ ├── w4a16_grouped_quant_asym_awq.yaml │ │ ├── w4a16_grouped_quant_sym_awq.yaml │ │ ├── w8a16_channel_quant.yaml │ │ ├── w8a16_grouped_quant.yaml │ │ ├── w8a8_dynamic_asym.yaml │ │ └── w8a8_static_asym.yaml │ │ ├── recipes │ │ ├── FP8 │ │ │ ├── recipe_fp8_dynamic.yaml │ │ │ ├── recipe_fp8_weight_only_channel.yaml │ │ │ └── recipe_fp8_weight_only_per_tensor.yaml │ │ ├── INT8 │ │ │ ├── recipe_int8_channel_weight_dynamic_per_token.yaml │ │ │ ├── recipe_int8_channel_weight_static_per_tensor_act.yaml │ │ │ ├── recipe_int8_tensor_weight_static_per_tensor_act.yaml │ │ │ ├── recipe_w8a8_dynamic_asym.yaml │ │ │ └── recipe_w8a8_static_asym.yaml │ │ ├── Sparse_2of4 │ │ │ ├── recipe_sparse_2of4.yaml │ │ │ └── recipe_sparse_2of4_fp8_dynamic.yaml │ │ ├── WNA16 │ │ │ ├── recipe_w4a16_awq_asym.yaml │ │ │ ├── recipe_w4a16_awq_sym.yaml │ │ │ ├── recipe_w4a16_channel_quant.yaml │ │ │ └── recipe_w8a16_channel_quant.yaml │ │ ├── WNA16_2of4 │ │ │ ├── 2of4_w4a16_group-128_recipe.yaml │ │ │ └── 2of4_w4a16_recipe.yaml │ │ ├── actorder │ │ │ ├── recipe_w4a16_actorder_group.yaml │ │ │ ├── recipe_w4a16_actorder_none.yaml │ │ │ └── recipe_w4a16_actorder_weight.yaml │ │ └── kv_cache │ │ │ ├── default.yaml │ │ │ └── gptq.yaml │ │ ├── rhaiis-e2e-smoke.list │ │ ├── run_tests_in_python.sh │ │ ├── run_tests_in_rhaiis.sh │ │ ├── run_vllm.py │ │ └── test_vllm.py ├── examples │ ├── __init__.py │ ├── test_example_scripts.py │ └── test_readmes.py ├── llmcompressor │ ├── __init__.py │ ├── conftest.py │ ├── helpers.py │ ├── metrics │ │ ├── __init__.py │ │ ├── test_logger.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── test_frequency_manager.py │ ├── modeling │ │ ├── test_calib_deepseek_v3.py │ │ ├── test_calib_llama4.py │ │ ├── test_calib_qwen3.py │ │ ├── test_calib_qwen3_next.py │ │ ├── test_calib_qwen3_vl_moe.py │ │ └── test_fuse.py │ ├── modifiers │ │ ├── __init__.py │ │ ├── awq │ │ │ ├── __init__.py │ │ │ └── test_base.py │ │ ├── calibration │ │ │ ├── __init__.py │ │ │ ├── test_frozen.py │ │ │ ├── test_lifecycle.py │ │ │ └── test_observers.py │ │ ├── conf.py │ │ ├── logarithmic_equalization │ │ │ ├── __init__.py │ │ │ └── test_base.py │ │ ├── pruning │ │ │ ├── __init__.py │ │ │ ├── sparsegpt │ │ │ │ ├── __init__.py │ │ │ │ └── test_base.py │ │ │ └── wanda │ │ │ │ ├── __init__.py │ │ │ │ └── test_base.py │ │ ├── quantization │ │ │ ├── __init__.py │ │ │ ├── test_base.py │ │ │ └── test_handling_shared_embeddings.py │ │ ├── smoothquant │ │ │ ├── __init__.py │ │ │ ├── test_base.py │ │ │ └── test_utils.py │ │ ├── transform │ │ │ ├── test_correctness.py │ │ │ └── test_serialization.py │ │ └── utils │ │ │ └── test_hooks.py │ ├── observers │ │ ├── __init__.py │ │ ├── test_helpers.py │ │ ├── test_min_max.py │ │ └── test_mse.py │ ├── pipelines │ │ ├── sequential │ │ │ ├── ast_utils.py │ │ │ │ └── test_auto_wrapper.py │ │ │ └── test_helpers.py │ │ ├── test_cache.py │ │ └── test_model_free_ptq.py │ ├── pytorch │ │ ├── __init__.py │ │ ├── helpers.py │ │ ├── modifiers │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── logarithmic_equalization │ │ │ │ ├── __init__.py │ │ │ │ └── test_pytorch.py │ │ │ ├── pruning │ │ │ │ ├── __init__.py │ │ │ │ ├── constant │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── test_pytorch.py │ │ │ │ ├── sparsegpt │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── test_pytorch.py │ │ │ │ └── wanda │ │ │ │ │ └── test_pytorch.py │ │ │ └── smoothquant │ │ │ │ ├── __init__.py │ │ │ │ └── test_pytorch.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── test_helpers.py │ │ │ └── test_sparse.py │ ├── recipe │ │ ├── __init__.py │ │ └── test_recipe.py │ ├── test_sentinel.py │ ├── transformers │ │ ├── __init__.py │ │ ├── autoround │ │ │ └── test_autoround_oneshot.py │ │ ├── compression │ │ │ ├── __init__.py │ │ │ ├── configs │ │ │ │ ├── actorder_group_1.1b.yaml │ │ │ │ ├── actorder_weight_1.1b.yaml │ │ │ │ ├── channelwise_1.1b.yaml │ │ │ │ ├── channelwise_smoke.yaml │ │ │ │ ├── fp8_1.1b.yaml │ │ │ │ ├── fp8_smoke.yaml │ │ │ │ ├── group_1.1b.yaml │ │ │ │ ├── inputs_1.1b.yaml │ │ │ │ ├── inputs_smoke.yaml │ │ │ │ ├── weights_only_1.1b.yaml │ │ │ │ └── weights_only_smoke.yaml │ │ │ ├── decompression_configs │ │ │ │ ├── fp8_dynamic.yaml │ │ │ │ ├── w4a16.yaml │ │ │ │ ├── w8a16_dense.yaml │ │ │ │ └── w8a8.yaml │ │ │ ├── recipes │ │ │ │ ├── new_quant_actorder_group.yaml │ │ │ │ ├── new_quant_actorder_weight.yaml │ │ │ │ ├── new_quant_channel.yaml │ │ │ │ ├── new_quant_fp8.yaml │ │ │ │ ├── new_quant_full.yaml │ │ │ │ ├── new_quant_group.yaml │ │ │ │ ├── new_quant_simple.yaml │ │ │ │ ├── new_quant_weight.yaml │ │ │ │ ├── smoothquant_gptq_w8a8.yaml │ │ │ │ ├── sparse_24.yaml │ │ │ │ └── sparse_24_fp8.yaml │ │ │ ├── run_compressed_configs │ │ │ │ ├── fp8_dynamic.yaml │ │ │ │ ├── w4a16.yaml │ │ │ │ ├── w8a16.yaml │ │ │ │ └── w8a8.yaml │ │ │ ├── test_compress_tensor_utils.py │ │ │ ├── test_decompress.py │ │ │ ├── test_has_gpu.py │ │ │ ├── test_helpers.py │ │ │ ├── test_quantization.py │ │ │ ├── test_recipe_parsing.py │ │ │ ├── test_run_compressed.py │ │ │ └── test_sparsity_metadata_config.py │ │ ├── conftest.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_dataset_helpers.py │ │ │ ├── test_dataset_loading.py │ │ │ └── test_registry.py │ │ ├── gptq │ │ │ └── test_gptq_oneshot.py │ │ ├── kv_cache │ │ │ └── test_kv_cache.py │ │ ├── oneshot │ │ │ ├── __init__.py │ │ │ ├── dataset_processing.py │ │ │ ├── oneshot_configs │ │ │ │ ├── recipes │ │ │ │ │ └── recipe.yaml │ │ │ │ ├── tiny_smoke_conf1.yaml │ │ │ │ ├── tiny_smoke_conf2.yaml │ │ │ │ ├── tiny_smoke_conf3.yaml │ │ │ │ ├── tiny_smoke_conf4.yaml │ │ │ │ ├── tiny_smoke_conf5.yaml │ │ │ │ └── tiny_smoke_conf6.yaml │ │ │ └── test_api_inputs.py │ │ ├── sparsegpt │ │ │ ├── __init__.py │ │ │ ├── recipes │ │ │ │ ├── additional_sparsity.yaml │ │ │ │ ├── additional_sparsity_with_quant.yaml │ │ │ │ ├── quant.yaml │ │ │ │ ├── quant_and_sparse.yaml │ │ │ │ ├── sparse.yaml │ │ │ │ ├── sparse_with_mask_structure.yaml │ │ │ │ └── test_tiny2.yaml │ │ │ ├── sparsegpt_configs │ │ │ │ ├── completion │ │ │ │ │ ├── gpu │ │ │ │ │ │ ├── llama_7b_quant.yaml │ │ │ │ │ │ ├── llama_7b_quant_and_sparse.yaml │ │ │ │ │ │ └── llama_7b_sparse.yml │ │ │ │ │ ├── tiny_llama_quant.yaml │ │ │ │ │ └── tiny_llama_quant_and_sparse.yaml │ │ │ │ ├── consec_runs │ │ │ │ │ ├── gpu │ │ │ │ │ │ └── llama_consec_runs.yaml │ │ │ │ │ └── tiny_llama_consec_runs.yaml │ │ │ │ ├── mask_structure │ │ │ │ │ └── tiny_llama_mask_structure_preservation.yaml │ │ │ │ ├── sparse │ │ │ │ │ ├── gpu │ │ │ │ │ │ └── llama_7b_sparse.yaml │ │ │ │ │ └── tiny_llama_sparse.yaml │ │ │ │ └── sparsity_generic │ │ │ │ │ └── config.yaml │ │ │ ├── test_consecutive_runs.py │ │ │ ├── test_mask_structure_preservation.py │ │ │ ├── test_oneshot_with_modifier.py │ │ │ ├── test_sparsegpt_completion.py │ │ │ ├── test_sparsegpt_infer_targets.py │ │ │ ├── test_sparsegpt_lm_head.py │ │ │ ├── test_sparsegpt_owl.py │ │ │ └── test_sparsegpt_sparsity.py │ │ └── tracing │ │ │ └── test_models.py │ └── utils │ │ ├── __init__.py │ │ ├── pytorch │ │ ├── __init__.py │ │ └── test_module.py │ │ ├── test_helpers.py │ │ └── test_transformers.py ├── lmeval │ ├── __init__.py │ ├── configs │ │ ├── fp8_dynamic_per_token.yaml │ │ ├── fp8_static_per_tensor.yaml │ │ ├── int8_w8a8_dynamic_per_token.yaml │ │ ├── vl_fp8_dynamic_per_token.yaml │ │ ├── vl_int8_w8a8_dynamic_per_token.yaml │ │ ├── vl_w4a16_actorder_weight.yaml │ │ ├── w4a16_actorder_group.yaml │ │ ├── w4a16_actorder_none.yaml │ │ ├── w4a16_actorder_weight.yaml │ │ ├── w4a16_awq_sym.yaml │ │ ├── w4a16_grouped_quant.yaml │ │ └── w4a4_nvfp4.yaml │ └── test_lmeval.py ├── test_timer │ ├── __init__.py │ ├── timer.py │ └── timer_utils.py ├── testing_utils.py └── unit │ ├── __init__.py │ ├── core │ ├── __init__.py │ ├── events │ │ ├── __init__.py │ │ └── test_event.py │ └── test_state.py │ └── test_logger.py └── tools └── collect_env.py /.MAINTAINERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.MAINTAINERS -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | patch = subprocess 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/ISSUE_TEMPLATE/bug_report.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/doc-edit.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/ISSUE_TEMPLATE/doc-edit.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/ISSUE_TEMPLATE/feature_request.md -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/TODO.txt: -------------------------------------------------------------------------------- 1 | TODO: update for upstream push -------------------------------------------------------------------------------- /.github/actions/prepare-code-coverage/action.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/actions/prepare-code-coverage/action.yml -------------------------------------------------------------------------------- /.github/workflows/build-and-publish-release-images.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/workflows/build-and-publish-release-images.yaml -------------------------------------------------------------------------------- /.github/workflows/linkcheck.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/workflows/linkcheck.yml -------------------------------------------------------------------------------- /.github/workflows/linkspector/linkspector.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/workflows/linkspector/linkspector.yml -------------------------------------------------------------------------------- /.github/workflows/quality-check.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/workflows/quality-check.yaml -------------------------------------------------------------------------------- /.github/workflows/ready-label-check.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/workflows/ready-label-check.yaml -------------------------------------------------------------------------------- /.github/workflows/result.xml.fail: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/workflows/result.xml.fail -------------------------------------------------------------------------------- /.github/workflows/result.xml.success: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/workflows/result.xml.success -------------------------------------------------------------------------------- /.github/workflows/set-comment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/workflows/set-comment.yaml -------------------------------------------------------------------------------- /.github/workflows/test-check-transformers.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/workflows/test-check-transformers.yaml -------------------------------------------------------------------------------- /.github/workflows/test-check.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.github/workflows/test-check.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.gitignore -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/CITATION.cff -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /DEVELOPING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/DEVELOPING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/Makefile -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/NOTICE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/README.md -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/assets/llmcompressor-icon-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/assets/llmcompressor-icon-white.png -------------------------------------------------------------------------------- /docs/assets/llmcompressor-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/assets/llmcompressor-icon.png -------------------------------------------------------------------------------- /docs/assets/llmcompressor-user-flows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/assets/llmcompressor-user-flows.png -------------------------------------------------------------------------------- /docs/developer/code-of-conduct.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/developer/code-of-conduct.md -------------------------------------------------------------------------------- /docs/developer/contributing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/developer/contributing.md -------------------------------------------------------------------------------- /docs/developer/developing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/developer/developing.md -------------------------------------------------------------------------------- /docs/developer/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/developer/index.md -------------------------------------------------------------------------------- /docs/examples/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/examples/index.md -------------------------------------------------------------------------------- /docs/getting-started/compress.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/getting-started/compress.md -------------------------------------------------------------------------------- /docs/getting-started/deploy.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/getting-started/deploy.md -------------------------------------------------------------------------------- /docs/getting-started/faq.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/getting-started/faq.md -------------------------------------------------------------------------------- /docs/getting-started/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/getting-started/index.md -------------------------------------------------------------------------------- /docs/getting-started/install.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/getting-started/install.md -------------------------------------------------------------------------------- /docs/guides/compression_formats.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/guides/compression_formats.md -------------------------------------------------------------------------------- /docs/guides/compression_schemes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/guides/compression_schemes.md -------------------------------------------------------------------------------- /docs/guides/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/guides/index.md -------------------------------------------------------------------------------- /docs/guides/saving_a_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/guides/saving_a_model.md -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/index.md -------------------------------------------------------------------------------- /docs/observers.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/observers.md -------------------------------------------------------------------------------- /docs/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/scripts/gen_files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/scripts/gen_files.py -------------------------------------------------------------------------------- /docs/scripts/mathjax.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/docs/scripts/mathjax.js -------------------------------------------------------------------------------- /docs/stylesheets/style.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/autoround/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/autoround/README.md -------------------------------------------------------------------------------- /examples/autoround/llama3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/autoround/llama3_example.py -------------------------------------------------------------------------------- /examples/awq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/awq/README.md -------------------------------------------------------------------------------- /examples/awq/llama_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/awq/llama_example.py -------------------------------------------------------------------------------- /examples/awq/qwen3-vl-30b-a3b-Instruct-example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/awq/qwen3-vl-30b-a3b-Instruct-example.py -------------------------------------------------------------------------------- /examples/awq/qwen3_coder_moe_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/awq/qwen3_coder_moe_example.py -------------------------------------------------------------------------------- /examples/awq/qwen3_moe_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/awq/qwen3_moe_example.py -------------------------------------------------------------------------------- /examples/big_models_with_sequential_onloading/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/big_models_with_sequential_onloading/README.md -------------------------------------------------------------------------------- /examples/big_models_with_sequential_onloading/assets/sequential_onloading.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/big_models_with_sequential_onloading/assets/sequential_onloading.png -------------------------------------------------------------------------------- /examples/big_models_with_sequential_onloading/llama3.3_70b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/big_models_with_sequential_onloading/llama3.3_70b.py -------------------------------------------------------------------------------- /examples/compressed_inference/fp8_compressed_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/compressed_inference/fp8_compressed_inference.py -------------------------------------------------------------------------------- /examples/finetuning/configure_fsdp.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/finetuning/configure_fsdp.md -------------------------------------------------------------------------------- /examples/finetuning/example_alternating_recipe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/finetuning/example_alternating_recipe.yaml -------------------------------------------------------------------------------- /examples/finetuning/example_fsdp_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/finetuning/example_fsdp_config.yaml -------------------------------------------------------------------------------- /examples/finetuning/example_single_gpu_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/finetuning/example_single_gpu_config.yaml -------------------------------------------------------------------------------- /examples/model_free_ptq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/model_free_ptq/README.md -------------------------------------------------------------------------------- /examples/model_free_ptq/kimi_k2_thinking_fp8_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/model_free_ptq/kimi_k2_thinking_fp8_block.py -------------------------------------------------------------------------------- /examples/multimodal_audio/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_audio/README.md -------------------------------------------------------------------------------- /examples/multimodal_audio/whisper_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_audio/whisper_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/README.md -------------------------------------------------------------------------------- /examples/multimodal_vision/README_internvl3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/README_internvl3.md -------------------------------------------------------------------------------- /examples/multimodal_vision/gemma3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/gemma3_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/idefics3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/idefics3_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/internvl3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/internvl3_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/llama4_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/llama4_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/llava_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/llava_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/mistral3_chat_template.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/mistral3_chat_template.json -------------------------------------------------------------------------------- /examples/multimodal_vision/mistral3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/mistral3_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/mllama_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/mllama_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/phi3_vision_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/phi3_vision_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/pixtral_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/pixtral_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/qwen2_vl_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/qwen2_vl_example.py -------------------------------------------------------------------------------- /examples/multimodal_vision/qwen_2_5_vl_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/multimodal_vision/qwen_2_5_vl_example.py -------------------------------------------------------------------------------- /examples/quantization_2of4_sparse_w4a16/2of4_w4a16_group-128_recipe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_group-128_recipe.yaml -------------------------------------------------------------------------------- /examples/quantization_2of4_sparse_w4a16/2of4_w4a16_recipe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_recipe.yaml -------------------------------------------------------------------------------- /examples/quantization_2of4_sparse_w4a16/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_2of4_sparse_w4a16/README.md -------------------------------------------------------------------------------- /examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py -------------------------------------------------------------------------------- /examples/quantization_kv_cache/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_kv_cache/README.md -------------------------------------------------------------------------------- /examples/quantization_kv_cache/gemma2_fp8_kv_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_kv_cache/gemma2_fp8_kv_example.py -------------------------------------------------------------------------------- /examples/quantization_kv_cache/llama3_fp8_kv_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_kv_cache/llama3_fp8_kv_example.py -------------------------------------------------------------------------------- /examples/quantization_kv_cache/phi3.5_fp8_kv_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_kv_cache/phi3.5_fp8_kv_example.py -------------------------------------------------------------------------------- /examples/quantization_non_uniform/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_non_uniform/README.md -------------------------------------------------------------------------------- /examples/quantization_non_uniform/quantization_fp8_multiple_strategies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_non_uniform/quantization_fp8_multiple_strategies.py -------------------------------------------------------------------------------- /examples/quantization_non_uniform/quantization_int4_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_non_uniform/quantization_int4_int8.py -------------------------------------------------------------------------------- /examples/quantization_non_uniform/quantization_multiple_modifiers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_non_uniform/quantization_multiple_modifiers.py -------------------------------------------------------------------------------- /examples/quantization_non_uniform/quantization_nvfp4_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_non_uniform/quantization_nvfp4_fp8.py -------------------------------------------------------------------------------- /examples/quantization_w4a16/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w4a16/README.md -------------------------------------------------------------------------------- /examples/quantization_w4a16/llama3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w4a16/llama3_example.py -------------------------------------------------------------------------------- /examples/quantization_w4a16_fp4/llama3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w4a16_fp4/llama3_example.py -------------------------------------------------------------------------------- /examples/quantization_w4a16_fp4/qwen3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w4a16_fp4/qwen3_example.py -------------------------------------------------------------------------------- /examples/quantization_w4a4_fp4/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w4a4_fp4/README.md -------------------------------------------------------------------------------- /examples/quantization_w4a4_fp4/llama3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w4a4_fp4/llama3_example.py -------------------------------------------------------------------------------- /examples/quantization_w4a4_fp4/llama4_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w4a4_fp4/llama4_example.py -------------------------------------------------------------------------------- /examples/quantization_w4a4_fp4/qwen3_next_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w4a4_fp4/qwen3_next_example.py -------------------------------------------------------------------------------- /examples/quantization_w4a4_fp4/qwen3_vl_moe_w4a4_fp4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w4a4_fp4/qwen3_vl_moe_w4a4_fp4.py -------------------------------------------------------------------------------- /examples/quantization_w4a4_fp4/qwen_30b_a3b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w4a4_fp4/qwen_30b_a3b.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/README.md -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/README_granite4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/README_granite4.md -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/fp8_block_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/fp8_block_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/gemma2_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/gemma2_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/granite4_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/granite4_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/llama3.2_vision_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/llama3.2_vision_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/llama3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/llama3_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/llama4_fp8_block_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/llama4_fp8_block_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/llava1.5_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/llava1.5_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/qwen2vl_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/qwen2vl_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/qwen3_next_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/qwen3_next_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/qwen3_vl_moe_fp8_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/qwen3_vl_moe_fp8_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/qwen_2_5_vl_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/qwen_2_5_vl_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_fp8/whisper_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_fp8/whisper_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_int8/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_int8/README.md -------------------------------------------------------------------------------- /examples/quantization_w8a8_int8/gemma2_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_int8/gemma2_example.py -------------------------------------------------------------------------------- /examples/quantization_w8a8_int8/llama3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantization_w8a8_int8/llama3_example.py -------------------------------------------------------------------------------- /examples/quantizing_moe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantizing_moe/README.md -------------------------------------------------------------------------------- /examples/quantizing_moe/deepseek_r1_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantizing_moe/deepseek_r1_example.py -------------------------------------------------------------------------------- /examples/quantizing_moe/mixtral_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantizing_moe/mixtral_example.py -------------------------------------------------------------------------------- /examples/quantizing_moe/qwen_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/quantizing_moe/qwen_example.py -------------------------------------------------------------------------------- /examples/sparse_2of4_quantization_fp8/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/sparse_2of4_quantization_fp8/README.md -------------------------------------------------------------------------------- /examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py -------------------------------------------------------------------------------- /examples/transform/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/transform/README.md -------------------------------------------------------------------------------- /examples/transform/quip_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/transform/quip_example.py -------------------------------------------------------------------------------- /examples/transform/spinquant_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/examples/transform/spinquant_example.py -------------------------------------------------------------------------------- /experimental/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/experimental/README.md -------------------------------------------------------------------------------- /experimental/llama3_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/experimental/llama3_attention.py -------------------------------------------------------------------------------- /experimental/mistral/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/experimental/mistral/README.md -------------------------------------------------------------------------------- /experimental/mistral/fp8_quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/experimental/mistral/fp8_quantize.py -------------------------------------------------------------------------------- /experimental/mxfp4/llama3_mxfp4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/experimental/mxfp4/llama3_mxfp4.py -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/mkdocs.yml -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/pyproject.toml -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/setup.py -------------------------------------------------------------------------------- /src/llmcompressor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/args/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/args/README.md -------------------------------------------------------------------------------- /src/llmcompressor/args/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/args/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/args/dataset_arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/args/dataset_arguments.py -------------------------------------------------------------------------------- /src/llmcompressor/args/model_arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/args/model_arguments.py -------------------------------------------------------------------------------- /src/llmcompressor/args/recipe_arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/args/recipe_arguments.py -------------------------------------------------------------------------------- /src/llmcompressor/args/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/args/utils.py -------------------------------------------------------------------------------- /src/llmcompressor/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/core/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/core/events/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/core/events/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/core/events/event.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/core/events/event.py -------------------------------------------------------------------------------- /src/llmcompressor/core/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/core/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/core/lifecycle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/core/lifecycle.py -------------------------------------------------------------------------------- /src/llmcompressor/core/model_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/core/model_layer.py -------------------------------------------------------------------------------- /src/llmcompressor/core/session.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/core/session.py -------------------------------------------------------------------------------- /src/llmcompressor/core/session_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/core/session_functions.py -------------------------------------------------------------------------------- /src/llmcompressor/core/state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/core/state.py -------------------------------------------------------------------------------- /src/llmcompressor/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/datasets/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/datasets/utils.py -------------------------------------------------------------------------------- /src/llmcompressor/entrypoints/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/entrypoints/README.md -------------------------------------------------------------------------------- /src/llmcompressor/entrypoints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/entrypoints/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/entrypoints/model_free/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/entrypoints/model_free/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/entrypoints/model_free/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/entrypoints/model_free/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/entrypoints/model_free/lifecycle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/entrypoints/model_free/lifecycle.py -------------------------------------------------------------------------------- /src/llmcompressor/entrypoints/model_free/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/entrypoints/model_free/model_utils.py -------------------------------------------------------------------------------- /src/llmcompressor/entrypoints/model_free/save_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/entrypoints/model_free/save_utils.py -------------------------------------------------------------------------------- /src/llmcompressor/entrypoints/oneshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/entrypoints/oneshot.py -------------------------------------------------------------------------------- /src/llmcompressor/entrypoints/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/entrypoints/utils.py -------------------------------------------------------------------------------- /src/llmcompressor/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/logger.py -------------------------------------------------------------------------------- /src/llmcompressor/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/metrics/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/metrics/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/metrics/logger.py -------------------------------------------------------------------------------- /src/llmcompressor/metrics/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .frequency_manager import * 4 | -------------------------------------------------------------------------------- /src/llmcompressor/metrics/utils/frequency_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/metrics/utils/frequency_manager.py -------------------------------------------------------------------------------- /src/llmcompressor/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modeling/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/modeling/deepseek_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modeling/deepseek_v3.py -------------------------------------------------------------------------------- /src/llmcompressor/modeling/fuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modeling/fuse.py -------------------------------------------------------------------------------- /src/llmcompressor/modeling/granite4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modeling/granite4.py -------------------------------------------------------------------------------- /src/llmcompressor/modeling/llama4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modeling/llama4.py -------------------------------------------------------------------------------- /src/llmcompressor/modeling/moe_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modeling/moe_context.py -------------------------------------------------------------------------------- /src/llmcompressor/modeling/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modeling/prepare.py -------------------------------------------------------------------------------- /src/llmcompressor/modeling/qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modeling/qwen3_moe.py -------------------------------------------------------------------------------- /src/llmcompressor/modeling/qwen3_next_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modeling/qwen3_next_moe.py -------------------------------------------------------------------------------- /src/llmcompressor/modeling/qwen3_vl_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modeling/qwen3_vl_moe.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/README.md -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/autoround/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .base import * 4 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/autoround/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/autoround/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/awq/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/awq/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/awq/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/awq/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/awq/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/awq/mappings.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/factory.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/interface.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/logarithmic_equalization/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .base import * 4 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/logarithmic_equalization/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/logarithmic_equalization/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/modifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/modifier.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/obcq/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .sgpt_base import * 4 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/obcq/sgpt_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/obcq/sgpt_base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/constant/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .base import ConstantPruningModifier 4 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/constant/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/constant/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/magnitude/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .base import MagnitudePruningModifier 4 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/magnitude/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/magnitude/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/sparsegpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/sparsegpt/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/sparsegpt/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/sparsegpt/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/sparsegpt/sgpt_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/sparsegpt/sgpt_base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/sparsegpt/sgpt_sparsify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/sparsegpt/sgpt_sparsify.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/utils/pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/utils/pytorch/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/utils/pytorch/layer_mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/utils/pytorch/layer_mask.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/utils/pytorch/mask_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/utils/pytorch/mask_factory.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/wanda/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .base import * 4 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/wanda/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/wanda/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/pruning/wanda/wanda_sparsify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/pruning/wanda/wanda_sparsify.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/quantization/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/quantization/calibration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/quantization/calibration.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/quantization/gptq/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .base import * 4 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/quantization/gptq/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/quantization/gptq/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/quantization/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/quantization/quantization/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/quantization/quantization/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/quantization/quantization/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/quantization/quantization/mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/quantization/quantization/mixin.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/smoothquant/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/smoothquant/README.md -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/smoothquant/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .base import * 4 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/smoothquant/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/smoothquant/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/smoothquant/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/smoothquant/utils.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/transform/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/transform/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/transform/quip/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .base import * 4 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/transform/quip/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/transform/quip/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/transform/spinquant/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .base import * 4 | -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/transform/spinquant/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/transform/spinquant/base.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/transform/spinquant/mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/transform/spinquant/mappings.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/transform/spinquant/norm_mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/transform/spinquant/norm_mappings.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/utils/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/utils/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/utils/constants.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/utils/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/utils/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/utils/hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/utils/hooks.py -------------------------------------------------------------------------------- /src/llmcompressor/modifiers/utils/pytorch_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/modifiers/utils/pytorch_helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/observers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/observers/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/observers/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/observers/base.py -------------------------------------------------------------------------------- /src/llmcompressor/observers/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/observers/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/observers/min_max.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/observers/min_max.py -------------------------------------------------------------------------------- /src/llmcompressor/observers/moving_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/observers/moving_base.py -------------------------------------------------------------------------------- /src/llmcompressor/observers/mse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/observers/mse.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/basic/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | from .pipeline import * 3 | -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/basic/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/basic/pipeline.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/cache.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/data_free/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | from .pipeline import * 3 | -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/data_free/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/data_free/pipeline.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/independent/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | from .pipeline import * 3 | -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/independent/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/independent/pipeline.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/registry.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/sequential/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/sequential/README.md -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/sequential/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/sequential/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/sequential/ast_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/sequential/ast_helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/sequential/ast_utils/auto_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/sequential/ast_utils/auto_wrapper.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/sequential/ast_utils/control_flow_analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/sequential/ast_utils/control_flow_analyzer.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/sequential/ast_utils/name_analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/sequential/ast_utils/name_analyzer.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/sequential/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/sequential/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/sequential/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/sequential/pipeline.py -------------------------------------------------------------------------------- /src/llmcompressor/pipelines/sequential/transformers_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pipelines/sequential/transformers_helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pytorch/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/pytorch/model_load/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llmcompressor/pytorch/model_load/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pytorch/model_load/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/pytorch/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pytorch/utils/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/pytorch/utils/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pytorch/utils/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/pytorch/utils/sparsification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pytorch/utils/sparsification.py -------------------------------------------------------------------------------- /src/llmcompressor/pytorch/utils/sparsification_info/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llmcompressor/pytorch/utils/sparsification_info/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pytorch/utils/sparsification_info/configs.py -------------------------------------------------------------------------------- /src/llmcompressor/pytorch/utils/sparsification_info/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pytorch/utils/sparsification_info/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/pytorch/utils/sparsification_info/module_sparsification_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/pytorch/utils/sparsification_info/module_sparsification_info.py -------------------------------------------------------------------------------- /src/llmcompressor/recipe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/recipe/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/recipe/metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/recipe/metadata.py -------------------------------------------------------------------------------- /src/llmcompressor/recipe/recipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/recipe/recipe.py -------------------------------------------------------------------------------- /src/llmcompressor/recipe/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/recipe/utils.py -------------------------------------------------------------------------------- /src/llmcompressor/sentinel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/sentinel.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/compression/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llmcompressor/transformers/compression/compressed_tensors_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/compression/compressed_tensors_utils.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/compression/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/compression/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/compression/sparsity_metadata_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/compression/sparsity_metadata_config.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/base.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/c4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/c4.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/cnn_dailymail.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/cnn_dailymail.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/custom.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/data_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/data_helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/evolcodealpaca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/evolcodealpaca.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/flickr_30k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/flickr_30k.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/gsm8k.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/open_platypus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/open_platypus.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/peoples_speech.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/peoples_speech.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/ultrachat_200k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/ultrachat_200k.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/data/wikitext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/data/wikitext.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/tracing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/tracing/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/tracing/debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/tracing/debug.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/utils/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/utils/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/utils/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/transformers/utils/preprocessing_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/transformers/utils/preprocessing_functions.py -------------------------------------------------------------------------------- /src/llmcompressor/typing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/typing.py -------------------------------------------------------------------------------- /src/llmcompressor/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/utils/__init__.py -------------------------------------------------------------------------------- /src/llmcompressor/utils/dev.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/utils/dev.py -------------------------------------------------------------------------------- /src/llmcompressor/utils/fsdp/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | -------------------------------------------------------------------------------- /src/llmcompressor/utils/fsdp/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/utils/fsdp/context.py -------------------------------------------------------------------------------- /src/llmcompressor/utils/fsdp/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/utils/fsdp/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/utils/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/utils/helpers.py -------------------------------------------------------------------------------- /src/llmcompressor/utils/metric_logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/utils/metric_logging.py -------------------------------------------------------------------------------- /src/llmcompressor/utils/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | 3 | from .module import * 4 | -------------------------------------------------------------------------------- /src/llmcompressor/utils/pytorch/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/utils/pytorch/module.py -------------------------------------------------------------------------------- /src/llmcompressor/utils/pytorch/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/utils/pytorch/utils.py -------------------------------------------------------------------------------- /src/llmcompressor/utils/transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/src/llmcompressor/utils/transformers.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/e2e_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/e2e_utils.py -------------------------------------------------------------------------------- /tests/e2e/vLLM/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/fp4_nvfp4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/fp4_nvfp4.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/fp4_nvfp4a16.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/fp4_nvfp4a16.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/fp8_block.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/fp8_block.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/fp8_dynamic_per_token.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/fp8_dynamic_per_token.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/fp8_static_per_tensor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/fp8_static_per_tensor.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/fp8_weight_only_channel.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/fp8_weight_only_channel.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/fp8_weight_only_tensor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/fp8_weight_only_tensor.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/int8_channel_weight_static_per_tensor_act.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/int8_channel_weight_static_per_tensor_act.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/int8_tensor_weight_static_per_tensor_act.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/int8_tensor_weight_static_per_tensor_act.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/kv_cache_gptq_tinyllama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/kv_cache_gptq_tinyllama.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/kv_cache_phi3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/kv_cache_phi3.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/kv_cache_tinyllama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/kv_cache_tinyllama.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/qwen3_fp4_nvfp4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/qwen3_fp4_nvfp4.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/qwen3_fp8_dynamic_per_token.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/qwen3_fp8_dynamic_per_token.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/sparse2of4_fp8_dynamic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/sparse2of4_fp8_dynamic.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/sparse_24.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/sparse_24.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w4a16_2of4_channel_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w4a16_2of4_channel_quant.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w4a16_2of4_grouped_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w4a16_2of4_grouped_quant.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w4a16_actorder_group.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w4a16_actorder_group.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w4a16_actorder_none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w4a16_actorder_none.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w4a16_actorder_weight.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w4a16_actorder_weight.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w4a16_channel_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w4a16_channel_quant.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w4a16_grouped_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w4a16_grouped_quant.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w4a16_grouped_quant_asym_awq.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w4a16_grouped_quant_sym_awq.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w4a16_grouped_quant_sym_awq.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w8a16_channel_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w8a16_channel_quant.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w8a16_grouped_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w8a16_grouped_quant.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w8a8_dynamic_asym.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w8a8_dynamic_asym.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/configs/w8a8_static_asym.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/configs/w8a8_static_asym.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/FP8/recipe_fp8_weight_only_channel.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/FP8/recipe_fp8_weight_only_channel.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/FP8/recipe_fp8_weight_only_per_tensor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/FP8/recipe_fp8_weight_only_per_tensor.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_static_per_tensor_act.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_static_per_tensor_act.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/INT8/recipe_int8_tensor_weight_static_per_tensor_act.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/INT8/recipe_int8_tensor_weight_static_per_tensor_act.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/INT8/recipe_w8a8_dynamic_asym.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/INT8/recipe_w8a8_dynamic_asym.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/INT8/recipe_w8a8_static_asym.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/INT8/recipe_w8a8_static_asym.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4_fp8_dynamic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4_fp8_dynamic.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_asym.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_asym.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_awq_sym.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_channel_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/WNA16/recipe_w4a16_channel_quant.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/WNA16/recipe_w8a16_channel_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/WNA16/recipe_w8a16_channel_quant.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_group-128_recipe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_group-128_recipe.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_recipe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/WNA16_2of4/2of4_w4a16_recipe.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_group.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_group.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_none.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/kv_cache/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/kv_cache/default.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/recipes/kv_cache/gptq.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/recipes/kv_cache/gptq.yaml -------------------------------------------------------------------------------- /tests/e2e/vLLM/rhaiis-e2e-smoke.list: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/rhaiis-e2e-smoke.list -------------------------------------------------------------------------------- /tests/e2e/vLLM/run_tests_in_python.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/run_tests_in_python.sh -------------------------------------------------------------------------------- /tests/e2e/vLLM/run_tests_in_rhaiis.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/run_tests_in_rhaiis.sh -------------------------------------------------------------------------------- /tests/e2e/vLLM/run_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/run_vllm.py -------------------------------------------------------------------------------- /tests/e2e/vLLM/test_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/e2e/vLLM/test_vllm.py -------------------------------------------------------------------------------- /tests/examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/examples/test_example_scripts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/examples/test_example_scripts.py -------------------------------------------------------------------------------- /tests/examples/test_readmes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/examples/test_readmes.py -------------------------------------------------------------------------------- /tests/llmcompressor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/conftest.py -------------------------------------------------------------------------------- /tests/llmcompressor/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/helpers.py -------------------------------------------------------------------------------- /tests/llmcompressor/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/metrics/test_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/metrics/test_logger.py -------------------------------------------------------------------------------- /tests/llmcompressor/metrics/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/metrics/utils/test_frequency_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/metrics/utils/test_frequency_manager.py -------------------------------------------------------------------------------- /tests/llmcompressor/modeling/test_calib_deepseek_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modeling/test_calib_deepseek_v3.py -------------------------------------------------------------------------------- /tests/llmcompressor/modeling/test_calib_llama4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modeling/test_calib_llama4.py -------------------------------------------------------------------------------- /tests/llmcompressor/modeling/test_calib_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modeling/test_calib_qwen3.py -------------------------------------------------------------------------------- /tests/llmcompressor/modeling/test_calib_qwen3_next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modeling/test_calib_qwen3_next.py -------------------------------------------------------------------------------- /tests/llmcompressor/modeling/test_calib_qwen3_vl_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modeling/test_calib_qwen3_vl_moe.py -------------------------------------------------------------------------------- /tests/llmcompressor/modeling/test_fuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modeling/test_fuse.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/awq/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/awq/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/awq/test_base.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/calibration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/calibration/test_frozen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/calibration/test_frozen.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/calibration/test_lifecycle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/calibration/test_lifecycle.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/calibration/test_observers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/calibration/test_observers.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/conf.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/logarithmic_equalization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/logarithmic_equalization/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/logarithmic_equalization/test_base.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/pruning/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/pruning/sparsegpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/pruning/sparsegpt/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/pruning/sparsegpt/test_base.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/pruning/wanda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/pruning/wanda/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/pruning/wanda/test_base.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/quantization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/quantization/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/quantization/test_base.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/quantization/test_handling_shared_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/quantization/test_handling_shared_embeddings.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/smoothquant/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/smoothquant/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/smoothquant/test_base.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/smoothquant/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/smoothquant/test_utils.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/transform/test_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/transform/test_correctness.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/transform/test_serialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/transform/test_serialization.py -------------------------------------------------------------------------------- /tests/llmcompressor/modifiers/utils/test_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/modifiers/utils/test_hooks.py -------------------------------------------------------------------------------- /tests/llmcompressor/observers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/observers/test_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/observers/test_helpers.py -------------------------------------------------------------------------------- /tests/llmcompressor/observers/test_min_max.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/observers/test_min_max.py -------------------------------------------------------------------------------- /tests/llmcompressor/observers/test_mse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/observers/test_mse.py -------------------------------------------------------------------------------- /tests/llmcompressor/pipelines/sequential/ast_utils.py/test_auto_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pipelines/sequential/ast_utils.py/test_auto_wrapper.py -------------------------------------------------------------------------------- /tests/llmcompressor/pipelines/sequential/test_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pipelines/sequential/test_helpers.py -------------------------------------------------------------------------------- /tests/llmcompressor/pipelines/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pipelines/test_cache.py -------------------------------------------------------------------------------- /tests/llmcompressor/pipelines/test_model_free_ptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pipelines/test_model_free_ptq.py -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pytorch/helpers.py -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pytorch/modifiers/conftest.py -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/logarithmic_equalization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/logarithmic_equalization/test_pytorch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pytorch/modifiers/logarithmic_equalization/test_pytorch.py -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/pruning/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/pruning/constant/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/pruning/constant/test_pytorch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pytorch/modifiers/pruning/constant/test_pytorch.py -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pytorch/modifiers/pruning/sparsegpt/test_pytorch.py -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/pruning/wanda/test_pytorch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pytorch/modifiers/pruning/wanda/test_pytorch.py -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/smoothquant/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/modifiers/smoothquant/test_pytorch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pytorch/modifiers/smoothquant/test_pytorch.py -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa 2 | -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/utils/test_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pytorch/utils/test_helpers.py -------------------------------------------------------------------------------- /tests/llmcompressor/pytorch/utils/test_sparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/pytorch/utils/test_sparse.py -------------------------------------------------------------------------------- /tests/llmcompressor/recipe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/recipe/test_recipe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/recipe/test_recipe.py -------------------------------------------------------------------------------- /tests/llmcompressor/test_sentinel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/test_sentinel.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/autoround/test_autoround_oneshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/autoround/test_autoround_oneshot.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/actorder_group_1.1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/actorder_group_1.1b.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/actorder_weight_1.1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/actorder_weight_1.1b.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/channelwise_1.1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/channelwise_1.1b.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/channelwise_smoke.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/channelwise_smoke.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/fp8_1.1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/fp8_1.1b.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/fp8_smoke.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/fp8_smoke.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/group_1.1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/group_1.1b.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/inputs_1.1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/inputs_1.1b.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/inputs_smoke.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/inputs_smoke.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/weights_only_1.1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/weights_only_1.1b.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/configs/weights_only_smoke.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/configs/weights_only_smoke.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/decompression_configs/fp8_dynamic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/decompression_configs/fp8_dynamic.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/decompression_configs/w4a16.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/decompression_configs/w4a16.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/decompression_configs/w8a16_dense.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/decompression_configs/w8a16_dense.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/decompression_configs/w8a8.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/new_quant_actorder_group.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/new_quant_actorder_group.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/new_quant_actorder_weight.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/new_quant_actorder_weight.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/new_quant_channel.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/new_quant_channel.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/new_quant_fp8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/new_quant_fp8.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/new_quant_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/new_quant_full.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/new_quant_group.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/new_quant_group.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/new_quant_simple.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/new_quant_simple.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/new_quant_weight.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/new_quant_weight.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/smoothquant_gptq_w8a8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/smoothquant_gptq_w8a8.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/sparse_24.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/sparse_24.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/recipes/sparse_24_fp8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/recipes/sparse_24_fp8.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/run_compressed_configs/fp8_dynamic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/run_compressed_configs/fp8_dynamic.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/run_compressed_configs/w4a16.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/run_compressed_configs/w4a16.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a16.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/run_compressed_configs/w8a8.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/test_compress_tensor_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/test_compress_tensor_utils.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/test_decompress.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/test_decompress.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/test_has_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/test_has_gpu.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/test_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/test_helpers.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/test_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/test_quantization.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/test_recipe_parsing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/test_recipe_parsing.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/test_run_compressed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/test_run_compressed.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/compression/test_sparsity_metadata_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/compression/test_sparsity_metadata_config.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/conftest.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/data/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/data/conftest.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/data/test_dataset_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/data/test_dataset_helpers.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/data/test_dataset_loading.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/data/test_dataset_loading.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/data/test_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/data/test_registry.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/gptq/test_gptq_oneshot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/gptq/test_gptq_oneshot.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/kv_cache/test_kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/oneshot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/oneshot/dataset_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/oneshot/dataset_processing.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/oneshot/oneshot_configs/recipes/recipe.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf1.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf2.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf3.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf4.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf5.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/oneshot/oneshot_configs/tiny_smoke_conf6.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/oneshot/test_api_inputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/oneshot/test_api_inputs.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/recipes/additional_sparsity.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/recipes/additional_sparsity.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/recipes/additional_sparsity_with_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/recipes/additional_sparsity_with_quant.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/recipes/quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/recipes/quant.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/recipes/quant_and_sparse.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/recipes/quant_and_sparse.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/recipes/sparse.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/recipes/sparse.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/recipes/sparse_with_mask_structure.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/recipes/sparse_with_mask_structure.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/recipes/test_tiny2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/recipes/test_tiny2.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/completion/gpu/llama_7b_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/completion/gpu/llama_7b_quant.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/completion/gpu/llama_7b_quant_and_sparse.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/completion/gpu/llama_7b_quant_and_sparse.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/completion/gpu/llama_7b_sparse.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/completion/gpu/llama_7b_sparse.yml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/completion/tiny_llama_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/completion/tiny_llama_quant.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/completion/tiny_llama_quant_and_sparse.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/completion/tiny_llama_quant_and_sparse.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/consec_runs/gpu/llama_consec_runs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/consec_runs/gpu/llama_consec_runs.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/consec_runs/tiny_llama_consec_runs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/consec_runs/tiny_llama_consec_runs.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/mask_structure/tiny_llama_mask_structure_preservation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/mask_structure/tiny_llama_mask_structure_preservation.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/sparse/gpu/llama_7b_sparse.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/sparse/gpu/llama_7b_sparse.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/sparse/tiny_llama_sparse.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/sparse/tiny_llama_sparse.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/sparsity_generic/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/sparsegpt_configs/sparsity_generic/config.yaml -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/test_consecutive_runs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/test_consecutive_runs.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/test_mask_structure_preservation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/test_mask_structure_preservation.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/test_oneshot_with_modifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/test_oneshot_with_modifier.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_completion.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_infer_targets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_infer_targets.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_lm_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_lm_head.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_owl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_owl.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_sparsity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_sparsity.py -------------------------------------------------------------------------------- /tests/llmcompressor/transformers/tracing/test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/transformers/tracing/test_models.py -------------------------------------------------------------------------------- /tests/llmcompressor/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/utils/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/llmcompressor/utils/pytorch/test_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/utils/pytorch/test_module.py -------------------------------------------------------------------------------- /tests/llmcompressor/utils/test_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/utils/test_helpers.py -------------------------------------------------------------------------------- /tests/llmcompressor/utils/test_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/llmcompressor/utils/test_transformers.py -------------------------------------------------------------------------------- /tests/lmeval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/lmeval/configs/fp8_dynamic_per_token.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/fp8_dynamic_per_token.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/fp8_static_per_tensor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/fp8_static_per_tensor.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/int8_w8a8_dynamic_per_token.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/int8_w8a8_dynamic_per_token.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/vl_fp8_dynamic_per_token.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/vl_fp8_dynamic_per_token.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/vl_int8_w8a8_dynamic_per_token.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/vl_int8_w8a8_dynamic_per_token.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/vl_w4a16_actorder_weight.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/vl_w4a16_actorder_weight.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/w4a16_actorder_group.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/w4a16_actorder_group.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/w4a16_actorder_none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/w4a16_actorder_none.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/w4a16_actorder_weight.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/w4a16_actorder_weight.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/w4a16_awq_sym.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/w4a16_awq_sym.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/w4a16_grouped_quant.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/w4a16_grouped_quant.yaml -------------------------------------------------------------------------------- /tests/lmeval/configs/w4a4_nvfp4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/configs/w4a4_nvfp4.yaml -------------------------------------------------------------------------------- /tests/lmeval/test_lmeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/lmeval/test_lmeval.py -------------------------------------------------------------------------------- /tests/test_timer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/test_timer/__init__.py -------------------------------------------------------------------------------- /tests/test_timer/timer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/test_timer/timer.py -------------------------------------------------------------------------------- /tests/test_timer/timer_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/test_timer/timer_utils.py -------------------------------------------------------------------------------- /tests/testing_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/testing_utils.py -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/core/events/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/core/events/test_event.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/unit/core/events/test_event.py -------------------------------------------------------------------------------- /tests/unit/core/test_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/unit/core/test_state.py -------------------------------------------------------------------------------- /tests/unit/test_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tests/unit/test_logger.py -------------------------------------------------------------------------------- /tools/collect_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/llm-compressor/HEAD/tools/collect_env.py --------------------------------------------------------------------------------