├── .dockerignore ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── 1_bug_report.md │ ├── 2_feature_request.md │ ├── 3_question.md │ └── get_system_info.py ├── PULL_REQUEST_TEMPLATE.md ├── codecov.yml ├── copy-pr-bot.yaml └── workflows │ ├── _wait_for_checks.yml │ ├── close_inactive_issues_pr.yml │ ├── code_quality.yml │ ├── example_tests.yml │ ├── gpu_tests.yml │ ├── pages.yml │ └── unit_tests.yml ├── .gitignore ├── .gitlab ├── .gitlab-ci.yml ├── release.yml └── tests.yml ├── .markdownlint-cli2.yaml ├── .pre-commit-config.yaml ├── .vscode ├── extensions.json └── settings.json ├── CHANGELOG-Windows.rst ├── CHANGELOG.rst ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── LICENSE_HEADER ├── README.md ├── SECURITY.md ├── docs └── source │ ├── _ext │ └── modelopt_autodoc_pydantic.py │ ├── _static │ └── custom.css │ ├── _templates │ └── autosummary │ │ └── module.rst │ ├── assets │ └── model-optimizer-banner.png │ ├── conf.py │ ├── deployment │ ├── 1_tensorrt_llm.rst │ ├── 2_directml.rst │ └── 3_unified_hf.rst │ ├── examples │ └── 0_all_examples.rst │ ├── getting_started │ ├── 1_overview.rst │ ├── 2_installation.rst │ ├── _installation_for_Linux.rst │ └── windows │ │ ├── _installation_for_Windows.rst │ │ ├── _installation_standalone.rst │ │ └── _installation_with_olive.rst │ ├── guides │ ├── 0_support_matrix.rst │ ├── 1_quantization.rst │ ├── 2_save_load.rst │ ├── 3_pruning.rst │ ├── 4_distillation.rst │ ├── 5_speculative_decoding.rst │ ├── 6_sparsity.rst │ ├── 7_nas.rst │ ├── 8_autocast.rst │ ├── _basic_quantization.rst │ ├── _choosing_quant_methods.rst │ ├── _compress_quantized_models.rst │ ├── _customized_model_quantization.rst │ ├── _onnx_quantization.rst │ ├── _pytorch_quantization.rst │ └── windows_guides │ │ └── _ONNX_PTQ_guide.rst │ ├── index.rst │ ├── reference │ ├── 0_changelog.rst │ ├── 1_modelopt_api.rst │ ├── _changelog_for_Linux.rst │ └── _changelog_for_Windows.rst │ └── support │ ├── 1_contact.rst │ └── 2_faqs.rst ├── examples ├── benchmark.md ├── chained_optimizations │ ├── .gitignore │ ├── README.md │ ├── bert_prune_distill_quantize.py │ └── scripts │ │ ├── 1_prune.sh │ │ ├── 2_int8_quantize.sh │ │ └── 3_onnx_export.sh ├── cnn_qat │ ├── README.md │ ├── requirements.txt │ ├── torchvision_qat.py │ └── utils.py ├── deepseek │ ├── .gitignore │ ├── README.md │ ├── ds_kernel.py │ ├── ptq.py │ ├── quantize_fp8_to_nvfp4.sh │ └── quantize_to_nvfp4.py ├── diffusers │ ├── README.md │ ├── cache_diffusion │ │ ├── assets │ │ │ ├── SDXL_Cache_Diffusion_Img.png │ │ │ └── sdxl_cache.png │ │ ├── benchmarks.py │ │ ├── cache_diffusion │ │ │ ├── cachify.py │ │ │ ├── module.py │ │ │ └── utils.py │ │ ├── example.ipynb │ │ ├── pipeline │ │ │ ├── config.py │ │ │ ├── deploy.py │ │ │ ├── models │ │ │ │ ├── sd3.py │ │ │ │ └── sdxl.py │ │ │ └── utils.py │ │ └── requirements.txt │ ├── eval │ │ ├── main.py │ │ ├── metrics │ │ │ ├── imagereward.py │ │ │ └── multimodal.py │ │ ├── requirements.txt │ │ └── utils.py │ └── quantization │ │ ├── .gitignore │ │ ├── assets │ │ ├── xl_base-fp16-sakura.png │ │ ├── xl_base-fp16.png │ │ ├── xl_base-fp8-sakura.png │ │ └── xl_base-int8.png │ │ ├── build_sdxl_8bit_engine.sh │ │ ├── calib │ │ └── plugin_calib.py │ │ ├── config.py │ │ ├── diffusion_trt.py │ │ ├── onnx_utils │ │ ├── export.py │ │ └── fp8_onnx_graphsurgeon.py │ │ ├── quantize.py │ │ ├── requirements.txt │ │ └── utils.py ├── gpt-oss │ ├── README.md │ ├── configs │ │ ├── sft_full.yaml │ │ ├── sft_lora.yaml │ │ └── zero3.yaml │ ├── convert_oai_mxfp4_weight_only.py │ ├── qat-finetune-transformers.ipynb │ ├── requirements.txt │ ├── sft.py │ └── utils.py ├── llm_autodeploy │ ├── README.md │ ├── api_client.py │ ├── api_server.py │ ├── run_auto_quantize.py │ └── scripts │ │ └── run_auto_quant_and_deploy.sh ├── llm_distill │ ├── README.md │ ├── accelerate_config │ │ └── fsdp2.yaml │ ├── main.py │ └── requirements.txt ├── llm_eval │ ├── .gitignore │ ├── README.md │ ├── __init__.py │ ├── gen_model_answer.py │ ├── livecodebench.py │ ├── lm_eval_hf.py │ ├── lm_eval_tensorrt_llm.py │ ├── mmlu.py │ ├── modeling.py │ ├── quantization_utils.py │ ├── requirements.txt │ ├── run_fastchat.sh │ ├── run_livecodebench.sh │ ├── run_lm_eval_vllm.sh │ ├── run_simple_eval.sh │ └── simple_evals.py ├── llm_ptq │ ├── .gitignore │ ├── README.md │ ├── config │ │ └── megatron_quantization.yaml │ ├── example_utils.py │ ├── fsdp2.yaml │ ├── hf_ptq.py │ ├── multinode_ptq.py │ ├── notebooks │ │ ├── 1_FP4-FP8_PTQ_Min-Max_Calibration.ipynb │ │ ├── 2_PTQ_AWQ_Calibration.ipynb │ │ └── 3_PTQ_AutoQuantization.ipynb │ ├── requirements-t5.txt │ ├── requirements-whisper.txt │ ├── requirements.txt │ ├── run_tensorrt_llm.py │ ├── scripts │ │ ├── huggingface_example.sh │ │ └── parser.sh │ └── vlm_utils.py ├── llm_qat │ ├── README.md │ ├── accelerate_config │ │ ├── ddp.yaml │ │ ├── deepspeed.yaml │ │ ├── fsdp1.yaml │ │ └── fsdp2.yaml │ ├── export.py │ ├── launch.sh │ ├── llama_factory │ │ ├── README.md │ │ ├── data │ │ │ └── dataset_info.json │ │ ├── launch_llamafactory.sh │ │ ├── llama_config.yaml │ │ ├── llama_factory.py │ │ └── llamafactory_cli.py │ ├── main.py │ ├── notebooks │ │ ├── QAT_QAD_Walkthrough.ipynb │ │ └── requirements.txt │ ├── requirements.txt │ ├── simple_qat_train.py │ └── utils.py ├── llm_sparsity │ ├── .gitignore │ ├── README.md │ ├── data_prep.py │ ├── eval.py │ ├── export_trtllm_ckpt.py │ ├── finetune.py │ ├── hf_pts.py │ ├── launch_finetune.sh │ ├── requirements.txt │ └── utils.py ├── model_hub │ ├── README.md │ ├── run_llama_fp8_sglang.py │ ├── run_llama_fp8_trtllm.py │ └── run_llama_fp8_vllm.py ├── nemo_run │ ├── common │ │ ├── in_memory_mmlu.py │ │ ├── llama_chat_template.txt │ │ ├── process_climbmix.py │ │ ├── process_lima.py │ │ ├── process_openscience.py │ │ └── utils.py │ ├── prune_distill │ │ ├── README.md │ │ └── nemo_prune_kd_flow.py │ └── qat │ │ ├── ADVANCED.md │ │ ├── README.md │ │ └── nemo_qat_flow.py ├── onnx_ptq │ ├── README.md │ ├── download_example_onnx.py │ ├── evaluate.py │ ├── evaluation.py │ ├── image_prep.py │ ├── llm_export.py │ ├── requirements.txt │ └── torch_quant_to_onnx.py ├── pruning │ ├── README.md │ └── cifar_resnet.ipynb ├── specdec_bench │ ├── README.md │ ├── run.py │ └── specdec_bench │ │ ├── __init__.py │ │ ├── datasets │ │ ├── __init__.py │ │ ├── base.py │ │ ├── base_hf.py │ │ ├── mtbench.py │ │ └── random_token.py │ │ ├── metrics │ │ ├── __init__.py │ │ ├── aa_timing.py │ │ ├── acceptance_rate.py │ │ ├── base.py │ │ ├── mtbench.py │ │ └── timing.py │ │ ├── models │ │ ├── __init__.py │ │ ├── base.py │ │ ├── sglang.py │ │ ├── trtllm_torch_api.py │ │ └── vllm.py │ │ ├── runners │ │ ├── __init__.py │ │ ├── base.py │ │ └── simple.py │ │ └── utils.py ├── speculative_decoding │ ├── .gitignore │ ├── README.md │ ├── SLURM_prepare_data.md │ ├── collect_hidden_states │ │ ├── __init__.py │ │ ├── compute_hidden_states_hf.py │ │ ├── compute_hidden_states_trtllm.py │ │ ├── run_hf_compute_hiddens.sh │ │ ├── run_hf_compute_hiddens_dp.sh │ │ ├── run_send_conversations.sh │ │ ├── run_trtllm_compute_hiddens.sh │ │ ├── run_trtllm_compute_hiddens_dp.sh │ │ ├── sample_hidden_states.py │ │ ├── send_conversations_for_hiddens.py │ │ └── slurm_dump.sh │ ├── distributed_generate │ │ ├── launch.sh │ │ ├── sharding_utils.py │ │ └── worker.sh │ ├── eagle_config.json │ ├── eagle_utils.py │ ├── example.ipynb │ ├── launch_train.sh │ ├── main.py │ ├── medusa_utils.py │ ├── prepare_input_conversations │ │ ├── __init__.py │ │ ├── add_daring_anteater.py │ │ ├── add_mtbench.py │ │ ├── add_sharegpt.py │ │ ├── add_ultrachat.py │ │ ├── example_make_prompt_dataset.sh │ │ └── utils.py │ ├── requirements.txt │ ├── scripts │ │ ├── ar_validate.py │ │ ├── calibrate_draft_vocab.py │ │ ├── convert_to_vllm_ckpt.py │ │ ├── export_hf_checkpoint.py │ │ └── server_generate.py │ └── train_eagle3_and_export.sh ├── vllm_serve │ ├── Dockerfile │ ├── README.md │ ├── fakequant_worker.py │ └── vllm_serve_fakequant.py ├── vlm_ptq │ ├── .gitignore │ ├── README.md │ ├── requirements-vila.txt │ └── scripts │ │ └── huggingface_example.sh └── windows │ ├── Benchmark.md │ ├── README.md │ ├── accuracy_benchmark │ ├── GenAI_API_changes_0.6.png │ ├── README.md │ ├── kl_divergence_metrics │ │ ├── README.md │ │ ├── compute_kl_divergence.py │ │ └── requirements.txt │ ├── mmlu_benchmark.py │ ├── modeling.py │ ├── perplexity_metrics │ │ ├── README.md │ │ ├── perplexity_metrics.py │ │ ├── requirements.txt │ │ └── run_perplexity.py │ ├── quantization_utils.py │ ├── requirements.txt │ └── trtllm_utils.py │ └── onnx_ptq │ ├── genai_llm │ ├── README.md │ ├── quantize.py │ └── requirements.txt │ ├── sam2 │ ├── README.md │ ├── requirements.txt │ └── sam2_onnx_quantization.py │ └── whisper │ ├── README.md │ ├── demo.wav │ ├── requirements.txt │ ├── whisper_onnx_quantization.py │ └── whisper_optimum_ort_inference.py ├── modelopt ├── __init__.py ├── deploy │ ├── __init__.py │ └── llm │ │ ├── __init__.py │ │ └── generate.py ├── onnx │ ├── __init__.py │ ├── autocast │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── convert.py │ │ ├── graphsanitizer.py │ │ ├── logging_config.py │ │ ├── nodeclassifier.py │ │ ├── precisionconverter.py │ │ ├── referencerunner.py │ │ └── utils.py │ ├── export │ │ ├── __init__.py │ │ ├── base_exporter.py │ │ ├── fp8_exporter.py │ │ ├── int4_exporter.py │ │ ├── int8_exporter.py │ │ ├── mxfp8_exporter.py │ │ └── nvfp4_exporter.py │ ├── llm_export_utils │ │ ├── __init__.py │ │ ├── export_utils.py │ │ ├── quantization_utils.py │ │ └── surgeon_utils.py │ ├── logging_config.py │ ├── op_types.py │ ├── quantization │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── calib_utils.py │ │ ├── extensions.py │ │ ├── fp8.py │ │ ├── graph_utils.py │ │ ├── gs_patching.py │ │ ├── int4.py │ │ ├── int8.py │ │ ├── operators.py │ │ ├── ort_patching.py │ │ ├── ort_utils.py │ │ ├── partitioning.py │ │ ├── qdq_utils.py │ │ ├── quant_utils.py │ │ ├── quantize.py │ │ └── src │ │ │ └── modelopt_round_and_pack_ext.cpp │ ├── trt_utils.py │ └── utils.py └── torch │ ├── __init__.py │ ├── _deploy │ ├── __init__.py │ ├── _runtime │ │ ├── __init__.py │ │ ├── common.py │ │ ├── ort_client.py │ │ ├── registry.py │ │ ├── runtime_client.py │ │ ├── tensorrt │ │ │ ├── constants.py │ │ │ ├── engine_builder.py │ │ │ ├── hw_param_config.py │ │ │ ├── layerwise_profiling.py │ │ │ ├── parse_trtexec_log.py │ │ │ └── tensorrt_utils.py │ │ └── trt_client.py │ ├── compilation.py │ ├── device_model.py │ ├── profiling.py │ └── utils │ │ ├── __init__.py │ │ ├── onnx_optimizer.py │ │ ├── onnx_utils.py │ │ └── torch_onnx.py │ ├── distill │ ├── __init__.py │ ├── config.py │ ├── distillation.py │ ├── distillation_model.py │ ├── loss_balancers.py │ ├── losses.py │ ├── mode.py │ ├── plugins │ │ ├── __init__.py │ │ ├── huggingface.py │ │ └── megatron.py │ └── registry.py │ ├── export │ ├── __init__.py │ ├── convert_hf_config.py │ ├── distribute.py │ ├── hf_config_map.py │ ├── layer_utils.py │ ├── mcore_config_map.py │ ├── model_config.py │ ├── model_config_export.py │ ├── model_config_utils.py │ ├── model_utils.py │ ├── plugins │ │ ├── __init__.py │ │ ├── hf_spec_export.py │ │ ├── mcore_common.py │ │ ├── mcore_custom.py │ │ ├── mcore_deepseek.py │ │ ├── mcore_gptoss.py │ │ ├── mcore_llama.py │ │ ├── mcore_nemotron.py │ │ ├── mcore_qwen.py │ │ ├── megatron_importer.py │ │ ├── nemo_run.py │ │ ├── vllm_fakequant_hf.py │ │ └── vllm_fakequant_megatron.py │ ├── postprocess.py │ ├── quant_utils.py │ ├── tensorrt_llm_type.py │ ├── tensorrt_llm_utils.py │ ├── transformer_engine.py │ ├── unified_export_hf.py │ └── unified_export_megatron.py │ ├── nas │ ├── __init__.py │ ├── algorithms.py │ ├── autonas.py │ ├── conversion.py │ ├── hparams │ │ ├── __init__.py │ │ ├── concat.py │ │ └── container.py │ ├── modules │ │ ├── __init__.py │ │ ├── container.py │ │ ├── conv.py │ │ ├── linear.py │ │ ├── norm.py │ │ └── utils.py │ ├── patch.py │ ├── plugins │ │ ├── __init__.py │ │ ├── megatron.py │ │ ├── torch.py │ │ ├── transformer_engine.py │ │ └── transformers.py │ ├── registry.py │ ├── search_space.py │ ├── traced_hp.py │ └── utils.py │ ├── opt │ ├── __init__.py │ ├── _hooks.py │ ├── config.py │ ├── conversion.py │ ├── dynamic.py │ ├── hparam.py │ ├── mode.py │ ├── plugins │ │ ├── __init__.py │ │ ├── diffusers.py │ │ ├── huggingface.py │ │ ├── mcore_dist_checkpointing.py │ │ ├── megatron.py │ │ ├── peft.py │ │ └── transformers.py │ ├── searcher.py │ └── utils.py │ ├── peft │ ├── __init__.py │ ├── config.py │ ├── conversion.py │ ├── convert.py │ ├── custom.py │ ├── lora │ │ ├── __init__.py │ │ ├── layer.py │ │ └── plugins │ │ │ ├── __init__.py │ │ │ └── megatron.py │ └── mode.py │ ├── prune │ ├── __init__.py │ ├── fastnas.py │ ├── gradnas.py │ ├── plugins │ │ ├── __init__.py │ │ ├── mcore_minitron.py │ │ └── transformers.py │ └── pruning.py │ ├── quantization │ ├── __init__.py │ ├── algorithms.py │ ├── backends │ │ ├── __init__.py │ │ ├── fp8_per_tensor_gemm.py │ │ ├── gemm_registry.py │ │ ├── nvfp4_gemm.py │ │ └── utils.py │ ├── calib │ │ ├── __init__.py │ │ ├── bias.py │ │ ├── calibrator.py │ │ ├── histogram.py │ │ ├── max.py │ │ └── mse.py │ ├── compress.py │ ├── config.py │ ├── conversion.py │ ├── export_onnx.py │ ├── extensions.py │ ├── mode.py │ ├── model_calib.py │ ├── model_quant.py │ ├── nn │ │ ├── __init__.py │ │ ├── functional.py │ │ └── modules │ │ │ ├── __init__.py │ │ │ ├── quant_activations.py │ │ │ ├── quant_batchnorm.py │ │ │ ├── quant_conv.py │ │ │ ├── quant_instancenorm.py │ │ │ ├── quant_linear.py │ │ │ ├── quant_module.py │ │ │ ├── quant_pooling.py │ │ │ ├── quant_rnn.py │ │ │ └── tensor_quantizer.py │ ├── plugins │ │ ├── __init__.py │ │ ├── accelerate.py │ │ ├── apex.py │ │ ├── attention.py │ │ ├── custom.py │ │ ├── diffusers.py │ │ ├── fairscale.py │ │ ├── huggingface.py │ │ ├── megatron.py │ │ ├── peft.py │ │ ├── pytorch_geometric.py │ │ ├── transformer_engine.py │ │ ├── transformers.py │ │ ├── transformers_trainer.py │ │ ├── trl.py │ │ └── vllm.py │ ├── qtensor │ │ ├── __init__.py │ │ ├── base_qtensor.py │ │ ├── fp8_tensor.py │ │ ├── int4_tensor.py │ │ ├── int8_tensor.py │ │ ├── mxfp4_tensor.py │ │ ├── nf4_tensor.py │ │ └── nvfp4_tensor.py │ ├── src │ │ ├── tensor_quant.cpp │ │ ├── tensor_quant.h │ │ ├── tensor_quant_gpu.cu │ │ ├── tensor_quant_gpu_fp8.cu │ │ ├── tensor_quant_mx.cu │ │ └── tensor_quant_mx.h │ ├── tensor_quant.py │ ├── triton │ │ ├── __init__.py │ │ └── fp4_kernel.py │ └── utils.py │ ├── sparsity │ ├── __init__.py │ └── weight_sparsity │ │ ├── __init__.py │ │ ├── config.py │ │ ├── magnitude.py │ │ ├── mode.py │ │ ├── module.py │ │ ├── plugins │ │ ├── __init__.py │ │ └── megatron.py │ │ ├── searcher.py │ │ ├── sparsegpt.py │ │ └── sparsification.py │ ├── speculative │ ├── __init__.py │ ├── config.py │ ├── eagle │ │ ├── __init__.py │ │ ├── conversion.py │ │ ├── default_config.py │ │ ├── eagle_model.py │ │ └── utils.py │ ├── medusa │ │ ├── __init__.py │ │ ├── conversion.py │ │ └── medusa_model.py │ ├── mode.py │ ├── plugins │ │ ├── __init__.py │ │ ├── megatron_eagle.py │ │ ├── megatron_medusa.py │ │ └── transformers.py │ ├── speculative_decoding.py │ └── utils.py │ ├── trace │ ├── __init__.py │ ├── analyzer.py │ ├── modules │ │ ├── __init__.py │ │ ├── concat.py │ │ └── nn.py │ ├── plugins │ │ ├── __init__.py │ │ └── transformers.py │ ├── symbols.py │ └── tracer.py │ └── utils │ ├── __init__.py │ ├── _pytree.py │ ├── cpp_extension.py │ ├── dataset_utils.py │ ├── distributed.py │ ├── graph.py │ ├── image_processor.py │ ├── import_utils.py │ ├── list.py │ ├── logging.py │ ├── memory_monitor.py │ ├── network.py │ ├── perf.py │ ├── plugins │ ├── __init__.py │ ├── megatron_generate.py │ ├── megatron_mmlu.py │ └── megatron_preprocess_data.py │ ├── random.py │ ├── regex.py │ ├── speech_dataset_utils.py │ ├── tensor.py │ └── vlm_dataset_utils.py ├── pyproject.toml ├── setup.py ├── tests ├── _test_utils │ ├── deploy_utils.py │ ├── examples │ │ ├── llm_ptq_utils.py │ │ ├── models.py │ │ ├── onnx_ptq │ │ │ └── aggregate_results.py │ │ └── run_command.py │ ├── import_helper.py │ ├── onnx │ │ ├── autocast │ │ │ └── utils.py │ │ ├── lib_test_models.py │ │ └── quantization │ │ │ └── utils.py │ └── torch │ │ ├── deploy │ │ ├── device_model.py │ │ ├── lib_test_models.py │ │ └── runtime.py │ │ ├── diffusers_models.py │ │ ├── distributed │ │ ├── fsdp_test.py │ │ └── utils.py │ │ ├── export │ │ └── utils.py │ │ ├── megatron │ │ ├── models.py │ │ └── utils.py │ │ ├── misc.py │ │ ├── nas_prune │ │ └── models.py │ │ ├── opt │ │ └── utils.py │ │ ├── quantization │ │ ├── models.py │ │ ├── onnx_export.py │ │ ├── quant_utils.py │ │ ├── quantize_common.py │ │ ├── tensor_quant_common.py │ │ └── tensor_quantizer_common.py │ │ ├── sparsity │ │ └── utils.py │ │ ├── transformers_models.py │ │ └── vision_models.py ├── conftest.py ├── examples │ ├── README.md │ ├── cnn_qat │ │ └── test_resnet50.py │ ├── conftest.py │ ├── diffusers │ │ ├── test_cache_diffusion.py │ │ └── test_diffusers.py │ ├── gpt_oss │ │ └── test_gpt_oss_qat.py │ ├── llm_autodeploy │ │ └── test_llama.py │ ├── llm_distill │ │ └── test_llm_distill.py │ ├── llm_eval │ │ └── test_llm_eval.py │ ├── llm_ptq │ │ ├── test_deploy.py │ │ └── test_llm_ptq.py │ ├── llm_qat │ │ └── test_llm_qat.py │ ├── llm_sparsity │ │ └── test_llama_sparsify.py │ ├── onnx_ptq │ │ ├── test_llm_export.py │ │ └── test_torch_quant_to_onnx.py │ ├── speculative_decoding │ │ ├── conftest.py │ │ ├── test_eagle.py │ │ └── test_medusa.py │ ├── test_onnx_ptq.sh │ └── vlm_ptq │ │ └── test_qwen_vl.py ├── gpu │ ├── _extensions │ │ ├── test_onnx_extensions.py │ │ └── test_torch_extensions.py │ ├── onnx │ │ ├── test_concat_elim.py │ │ ├── test_ort_patching.py │ │ ├── test_plugin.py │ │ ├── test_qdq_utils_fp8.py │ │ ├── test_quantize_fp8.py │ │ ├── test_quantize_onnx_torch_int4_awq.py │ │ └── test_simplify.py │ └── torch │ │ ├── conftest.py │ │ ├── deploy │ │ ├── _runtime │ │ │ └── test_trt_client.py │ │ └── test_gpu_deploy_benchmark.py │ │ ├── export │ │ ├── test_export.py │ │ ├── test_export_weight_gpu.py │ │ ├── test_fsdp2_export.py │ │ ├── test_quant_utils.py │ │ ├── test_unified_export_megatron.py │ │ ├── test_unified_hf_export_and_check_safetensors.py │ │ ├── test_vllm_fakequant_hf_export.py │ │ └── test_vllm_fakequant_megatron_export.py │ │ ├── nas │ │ ├── plugins │ │ │ ├── test_megatron_gpt_dynamic_modules.py │ │ │ └── test_megatron_mamba_dynamic_modules.py │ │ ├── test_distributed_model.py │ │ └── test_search_space_with_vision_models.py │ │ ├── opt │ │ ├── plugins │ │ │ ├── test_megatron_chaining.py │ │ │ └── test_transformers_multi_process.py │ │ └── test_fsdp_save_restore.py │ │ ├── peft │ │ └── test_megatron_peft.py │ │ ├── prune │ │ └── plugins │ │ │ ├── test_mcore_gpt_minitron_pruning.py │ │ │ └── test_mcore_mamba_minitron_pruning.py │ │ ├── quantization │ │ ├── backends │ │ │ ├── test_fp8_per_tensor_gemm.py │ │ │ ├── test_gemm_common.py │ │ │ ├── test_gemm_registry.py │ │ │ └── test_nvfp4_gemm.py │ │ ├── conftest.py │ │ ├── plugins │ │ │ ├── test_accelerate_gpu.py │ │ │ ├── test_apex.py │ │ │ ├── test_megatron.py │ │ │ ├── test_transformer_engine.py │ │ │ └── test_transformers_tp.py │ │ ├── test_deepspeed.py │ │ ├── test_fsdp.py │ │ ├── test_fsdp2.py │ │ ├── test_hadamard.py │ │ ├── test_nvfp4_onnx_export.py │ │ ├── test_onnx_export_cuda.py │ │ ├── test_qtensor_cuda.py │ │ ├── test_quant_rnn_cuda.py │ │ ├── test_quantize_cuda.py │ │ ├── test_quantize_mxformats_cuda.py │ │ ├── test_real_quantize_cuda.py │ │ ├── test_tensor_quant_cuda.py │ │ ├── test_tensor_quantizer_cuda.py │ │ └── test_torch_export.py │ │ ├── sparsity │ │ └── weight_sparsity │ │ │ ├── plugins │ │ │ └── test_megatron_sparsity.py │ │ │ └── test_sparse_fsdp.py │ │ ├── speculative │ │ └── plugins │ │ │ └── test_speculative_megatron_modules.py │ │ └── utils │ │ └── plugins │ │ └── test_utils_megatron.py └── unit │ ├── deploy │ └── test_deploy_dummy.py │ ├── onnx │ ├── autocast │ │ ├── test_autocast.py │ │ ├── test_graphsanitizer.py │ │ ├── test_nodeclassifier.py │ │ ├── test_precisionconverter.py │ │ └── test_referencerunner.py │ ├── test_convtranspose_qdq.py │ ├── test_onnx_utils.py │ ├── test_partitioning.py │ ├── test_qdq_rules_int8.py │ ├── test_qdq_utils.py │ ├── test_quant_utils.py │ ├── test_quantize_int8.py │ └── test_quantize_zint4.py │ └── torch │ ├── conftest.py │ ├── deploy │ ├── _runtime │ │ └── tensorrt │ │ │ ├── test_engine_builder.py │ │ │ ├── test_layerwise_profiling.py │ │ │ └── test_tensorrt_utils.py │ ├── test_cpu_deploy_benchmark.py │ ├── test_runtime_config.py │ └── utils │ │ └── test_torch_onnx_utils.py │ ├── distill │ └── test_distill.py │ ├── export │ ├── test_export_weight.py │ └── test_get_quantization.py │ ├── nas │ ├── modules │ │ ├── test_container.py │ │ ├── test_conv.py │ │ ├── test_linear.py │ │ ├── test_mod_utils.py │ │ └── test_norm.py │ ├── plugins │ │ ├── test_dynamic_hf_attention.py │ │ ├── test_hf_nas_save_restore.py │ │ └── test_torch_hooks.py │ ├── test_evaluate_constraints.py │ ├── test_full_algorithms_vision.py │ ├── test_latency_interpolator.py │ ├── test_nas.py │ ├── test_nas_utils.py │ ├── test_registry.py │ ├── test_search_space.py │ ├── test_search_space_with_example_models.py │ └── test_search_space_with_torchvision.py │ ├── opt │ ├── plugins │ │ ├── test_diffusers_save_load.py │ │ ├── test_hf_patching.py │ │ ├── test_peft_save_load.py │ │ └── test_transformers_save_load.py │ ├── test_chaining.py │ ├── test_config.py │ ├── test_dynamic.py │ └── test_mode_registry.py │ ├── prune │ ├── test_algorithms.py │ ├── test_fastnas_conversion.py │ └── test_gradnas.py │ ├── quantization │ ├── plugins │ │ ├── test_accelerate.py │ │ ├── test_attention_quant.py │ │ ├── test_huggingface.py │ │ ├── test_peft.py │ │ └── test_pytorch_geometric_plugin.py │ ├── test_affine_quant.py │ ├── test_autoquant.py │ ├── test_calib.py │ ├── test_calibrator.py │ ├── test_config_validation.py │ ├── test_dist.py │ ├── test_mode.py │ ├── test_module_registry.py │ ├── test_mse_calibrator.py │ ├── test_onnx_export_cpu.py │ ├── test_print.py │ ├── test_quant_activations.py │ ├── test_quant_batchnorm.py │ ├── test_quant_conv.py │ ├── test_quant_instancenorm.py │ ├── test_quant_linear.py │ ├── test_quant_pooling.py │ ├── test_quant_rnn.py │ ├── test_quantize_cpu.py │ ├── test_quantize_replace.py │ ├── test_tensor_quant_cpu.py │ ├── test_tensor_quantizer_cpu.py │ └── test_utils.py │ ├── sparsity │ └── weight_sparsity │ │ └── test_sparsify.py │ ├── speculative │ └── plugins │ │ └── test_hf_speculative.py │ ├── trace │ ├── plugins │ │ └── test_transformers_attention_symbols.py │ ├── test_analyzer.py │ ├── test_concat.py │ ├── test_model.py │ ├── test_nn.py │ ├── test_symbol.py │ └── test_tracer.py │ └── utils │ ├── test_dataset_utils.py │ ├── test_megatron_preprocess_data.py │ ├── test_network.py │ ├── test_pytree.py │ ├── test_regex.py │ └── test_tensor.py └── tox.ini /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.dockerignore -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/1_bug_report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/ISSUE_TEMPLATE/1_bug_report.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/2_feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/ISSUE_TEMPLATE/2_feature_request.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/3_question.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/ISSUE_TEMPLATE/3_question.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/get_system_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/ISSUE_TEMPLATE/get_system_info.py -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/codecov.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/codecov.yml -------------------------------------------------------------------------------- /.github/copy-pr-bot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/copy-pr-bot.yaml -------------------------------------------------------------------------------- /.github/workflows/_wait_for_checks.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/workflows/_wait_for_checks.yml -------------------------------------------------------------------------------- /.github/workflows/close_inactive_issues_pr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/workflows/close_inactive_issues_pr.yml -------------------------------------------------------------------------------- /.github/workflows/code_quality.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/workflows/code_quality.yml -------------------------------------------------------------------------------- /.github/workflows/example_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/workflows/example_tests.yml -------------------------------------------------------------------------------- /.github/workflows/gpu_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/workflows/gpu_tests.yml -------------------------------------------------------------------------------- /.github/workflows/pages.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/workflows/pages.yml -------------------------------------------------------------------------------- /.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.github/workflows/unit_tests.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitlab/.gitlab-ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.gitlab/.gitlab-ci.yml -------------------------------------------------------------------------------- /.gitlab/release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.gitlab/release.yml -------------------------------------------------------------------------------- /.gitlab/tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.gitlab/tests.yml -------------------------------------------------------------------------------- /.markdownlint-cli2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.markdownlint-cli2.yaml -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.vscode/extensions.json -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /CHANGELOG-Windows.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/CHANGELOG-Windows.rst -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/CHANGELOG.rst -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/LICENSE -------------------------------------------------------------------------------- /LICENSE_HEADER: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/LICENSE_HEADER -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/SECURITY.md -------------------------------------------------------------------------------- /docs/source/_ext/modelopt_autodoc_pydantic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/_ext/modelopt_autodoc_pydantic.py -------------------------------------------------------------------------------- /docs/source/_static/custom.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/_static/custom.css -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/module.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/_templates/autosummary/module.rst -------------------------------------------------------------------------------- /docs/source/assets/model-optimizer-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/assets/model-optimizer-banner.png -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/deployment/1_tensorrt_llm.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/deployment/1_tensorrt_llm.rst -------------------------------------------------------------------------------- /docs/source/deployment/2_directml.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/deployment/2_directml.rst -------------------------------------------------------------------------------- /docs/source/deployment/3_unified_hf.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/deployment/3_unified_hf.rst -------------------------------------------------------------------------------- /docs/source/examples/0_all_examples.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/examples/0_all_examples.rst -------------------------------------------------------------------------------- /docs/source/getting_started/1_overview.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/getting_started/1_overview.rst -------------------------------------------------------------------------------- /docs/source/getting_started/2_installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/getting_started/2_installation.rst -------------------------------------------------------------------------------- /docs/source/getting_started/_installation_for_Linux.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/getting_started/_installation_for_Linux.rst -------------------------------------------------------------------------------- /docs/source/getting_started/windows/_installation_for_Windows.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/getting_started/windows/_installation_for_Windows.rst -------------------------------------------------------------------------------- /docs/source/guides/0_support_matrix.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/0_support_matrix.rst -------------------------------------------------------------------------------- /docs/source/guides/1_quantization.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/1_quantization.rst -------------------------------------------------------------------------------- /docs/source/guides/2_save_load.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/2_save_load.rst -------------------------------------------------------------------------------- /docs/source/guides/3_pruning.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/3_pruning.rst -------------------------------------------------------------------------------- /docs/source/guides/4_distillation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/4_distillation.rst -------------------------------------------------------------------------------- /docs/source/guides/5_speculative_decoding.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/5_speculative_decoding.rst -------------------------------------------------------------------------------- /docs/source/guides/6_sparsity.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/6_sparsity.rst -------------------------------------------------------------------------------- /docs/source/guides/7_nas.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/7_nas.rst -------------------------------------------------------------------------------- /docs/source/guides/8_autocast.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/8_autocast.rst -------------------------------------------------------------------------------- /docs/source/guides/_basic_quantization.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/_basic_quantization.rst -------------------------------------------------------------------------------- /docs/source/guides/_choosing_quant_methods.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/_choosing_quant_methods.rst -------------------------------------------------------------------------------- /docs/source/guides/_compress_quantized_models.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/_compress_quantized_models.rst -------------------------------------------------------------------------------- /docs/source/guides/_customized_model_quantization.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/_customized_model_quantization.rst -------------------------------------------------------------------------------- /docs/source/guides/_onnx_quantization.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/_onnx_quantization.rst -------------------------------------------------------------------------------- /docs/source/guides/_pytorch_quantization.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/_pytorch_quantization.rst -------------------------------------------------------------------------------- /docs/source/guides/windows_guides/_ONNX_PTQ_guide.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/guides/windows_guides/_ONNX_PTQ_guide.rst -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/reference/0_changelog.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/reference/0_changelog.rst -------------------------------------------------------------------------------- /docs/source/reference/1_modelopt_api.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/reference/1_modelopt_api.rst -------------------------------------------------------------------------------- /docs/source/reference/_changelog_for_Linux.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/reference/_changelog_for_Linux.rst -------------------------------------------------------------------------------- /docs/source/reference/_changelog_for_Windows.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/reference/_changelog_for_Windows.rst -------------------------------------------------------------------------------- /docs/source/support/1_contact.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/support/1_contact.rst -------------------------------------------------------------------------------- /docs/source/support/2_faqs.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/docs/source/support/2_faqs.rst -------------------------------------------------------------------------------- /examples/benchmark.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/benchmark.md -------------------------------------------------------------------------------- /examples/chained_optimizations/.gitignore: -------------------------------------------------------------------------------- 1 | results**/ 2 | -------------------------------------------------------------------------------- /examples/chained_optimizations/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/chained_optimizations/README.md -------------------------------------------------------------------------------- /examples/chained_optimizations/bert_prune_distill_quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/chained_optimizations/bert_prune_distill_quantize.py -------------------------------------------------------------------------------- /examples/chained_optimizations/scripts/1_prune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/chained_optimizations/scripts/1_prune.sh -------------------------------------------------------------------------------- /examples/chained_optimizations/scripts/2_int8_quantize.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/chained_optimizations/scripts/2_int8_quantize.sh -------------------------------------------------------------------------------- /examples/chained_optimizations/scripts/3_onnx_export.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/chained_optimizations/scripts/3_onnx_export.sh -------------------------------------------------------------------------------- /examples/cnn_qat/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/cnn_qat/README.md -------------------------------------------------------------------------------- /examples/cnn_qat/requirements.txt: -------------------------------------------------------------------------------- 1 | torchvision 2 | -------------------------------------------------------------------------------- /examples/cnn_qat/torchvision_qat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/cnn_qat/torchvision_qat.py -------------------------------------------------------------------------------- /examples/cnn_qat/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/cnn_qat/utils.py -------------------------------------------------------------------------------- /examples/deepseek/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/deepseek/.gitignore -------------------------------------------------------------------------------- /examples/deepseek/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/deepseek/README.md -------------------------------------------------------------------------------- /examples/deepseek/ds_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/deepseek/ds_kernel.py -------------------------------------------------------------------------------- /examples/deepseek/ptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/deepseek/ptq.py -------------------------------------------------------------------------------- /examples/deepseek/quantize_fp8_to_nvfp4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/deepseek/quantize_fp8_to_nvfp4.sh -------------------------------------------------------------------------------- /examples/deepseek/quantize_to_nvfp4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/deepseek/quantize_to_nvfp4.py -------------------------------------------------------------------------------- /examples/diffusers/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/README.md -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/assets/sdxl_cache.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/assets/sdxl_cache.png -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/benchmarks.py -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/cache_diffusion/cachify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/cache_diffusion/cachify.py -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/cache_diffusion/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/cache_diffusion/module.py -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/cache_diffusion/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/cache_diffusion/utils.py -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/example.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/example.ipynb -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/pipeline/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/pipeline/config.py -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/pipeline/deploy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/pipeline/deploy.py -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/pipeline/models/sd3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/pipeline/models/sd3.py -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/pipeline/models/sdxl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/pipeline/models/sdxl.py -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/pipeline/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/pipeline/utils.py -------------------------------------------------------------------------------- /examples/diffusers/cache_diffusion/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/cache_diffusion/requirements.txt -------------------------------------------------------------------------------- /examples/diffusers/eval/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/eval/main.py -------------------------------------------------------------------------------- /examples/diffusers/eval/metrics/imagereward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/eval/metrics/imagereward.py -------------------------------------------------------------------------------- /examples/diffusers/eval/metrics/multimodal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/eval/metrics/multimodal.py -------------------------------------------------------------------------------- /examples/diffusers/eval/requirements.txt: -------------------------------------------------------------------------------- 1 | image-reward 2 | torchmetrics 3 | -------------------------------------------------------------------------------- /examples/diffusers/eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/eval/utils.py -------------------------------------------------------------------------------- /examples/diffusers/quantization/.gitignore: -------------------------------------------------------------------------------- 1 | *.plan 2 | *.png 3 | -------------------------------------------------------------------------------- /examples/diffusers/quantization/assets/xl_base-fp16-sakura.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/assets/xl_base-fp16-sakura.png -------------------------------------------------------------------------------- /examples/diffusers/quantization/assets/xl_base-fp16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/assets/xl_base-fp16.png -------------------------------------------------------------------------------- /examples/diffusers/quantization/assets/xl_base-fp8-sakura.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/assets/xl_base-fp8-sakura.png -------------------------------------------------------------------------------- /examples/diffusers/quantization/assets/xl_base-int8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/assets/xl_base-int8.png -------------------------------------------------------------------------------- /examples/diffusers/quantization/build_sdxl_8bit_engine.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/build_sdxl_8bit_engine.sh -------------------------------------------------------------------------------- /examples/diffusers/quantization/calib/plugin_calib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/calib/plugin_calib.py -------------------------------------------------------------------------------- /examples/diffusers/quantization/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/config.py -------------------------------------------------------------------------------- /examples/diffusers/quantization/diffusion_trt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/diffusion_trt.py -------------------------------------------------------------------------------- /examples/diffusers/quantization/onnx_utils/export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/onnx_utils/export.py -------------------------------------------------------------------------------- /examples/diffusers/quantization/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/quantize.py -------------------------------------------------------------------------------- /examples/diffusers/quantization/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/requirements.txt -------------------------------------------------------------------------------- /examples/diffusers/quantization/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/diffusers/quantization/utils.py -------------------------------------------------------------------------------- /examples/gpt-oss/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/gpt-oss/README.md -------------------------------------------------------------------------------- /examples/gpt-oss/configs/sft_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/gpt-oss/configs/sft_full.yaml -------------------------------------------------------------------------------- /examples/gpt-oss/configs/sft_lora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/gpt-oss/configs/sft_lora.yaml -------------------------------------------------------------------------------- /examples/gpt-oss/configs/zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/gpt-oss/configs/zero3.yaml -------------------------------------------------------------------------------- /examples/gpt-oss/convert_oai_mxfp4_weight_only.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/gpt-oss/convert_oai_mxfp4_weight_only.py -------------------------------------------------------------------------------- /examples/gpt-oss/qat-finetune-transformers.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/gpt-oss/qat-finetune-transformers.ipynb -------------------------------------------------------------------------------- /examples/gpt-oss/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/gpt-oss/requirements.txt -------------------------------------------------------------------------------- /examples/gpt-oss/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/gpt-oss/sft.py -------------------------------------------------------------------------------- /examples/gpt-oss/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/gpt-oss/utils.py -------------------------------------------------------------------------------- /examples/llm_autodeploy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_autodeploy/README.md -------------------------------------------------------------------------------- /examples/llm_autodeploy/api_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_autodeploy/api_client.py -------------------------------------------------------------------------------- /examples/llm_autodeploy/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_autodeploy/api_server.py -------------------------------------------------------------------------------- /examples/llm_autodeploy/run_auto_quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_autodeploy/run_auto_quantize.py -------------------------------------------------------------------------------- /examples/llm_autodeploy/scripts/run_auto_quant_and_deploy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_autodeploy/scripts/run_auto_quant_and_deploy.sh -------------------------------------------------------------------------------- /examples/llm_distill/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_distill/README.md -------------------------------------------------------------------------------- /examples/llm_distill/accelerate_config/fsdp2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_distill/accelerate_config/fsdp2.yaml -------------------------------------------------------------------------------- /examples/llm_distill/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_distill/main.py -------------------------------------------------------------------------------- /examples/llm_distill/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_distill/requirements.txt -------------------------------------------------------------------------------- /examples/llm_eval/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/.gitignore -------------------------------------------------------------------------------- /examples/llm_eval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/README.md -------------------------------------------------------------------------------- /examples/llm_eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/__init__.py -------------------------------------------------------------------------------- /examples/llm_eval/gen_model_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/gen_model_answer.py -------------------------------------------------------------------------------- /examples/llm_eval/livecodebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/livecodebench.py -------------------------------------------------------------------------------- /examples/llm_eval/lm_eval_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/lm_eval_hf.py -------------------------------------------------------------------------------- /examples/llm_eval/lm_eval_tensorrt_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/lm_eval_tensorrt_llm.py -------------------------------------------------------------------------------- /examples/llm_eval/mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/mmlu.py -------------------------------------------------------------------------------- /examples/llm_eval/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/modeling.py -------------------------------------------------------------------------------- /examples/llm_eval/quantization_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/quantization_utils.py -------------------------------------------------------------------------------- /examples/llm_eval/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/requirements.txt -------------------------------------------------------------------------------- /examples/llm_eval/run_fastchat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/run_fastchat.sh -------------------------------------------------------------------------------- /examples/llm_eval/run_livecodebench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/run_livecodebench.sh -------------------------------------------------------------------------------- /examples/llm_eval/run_lm_eval_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/run_lm_eval_vllm.sh -------------------------------------------------------------------------------- /examples/llm_eval/run_simple_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/run_simple_eval.sh -------------------------------------------------------------------------------- /examples/llm_eval/simple_evals.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_eval/simple_evals.py -------------------------------------------------------------------------------- /examples/llm_ptq/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/.gitignore -------------------------------------------------------------------------------- /examples/llm_ptq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/README.md -------------------------------------------------------------------------------- /examples/llm_ptq/config/megatron_quantization.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/config/megatron_quantization.yaml -------------------------------------------------------------------------------- /examples/llm_ptq/example_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/example_utils.py -------------------------------------------------------------------------------- /examples/llm_ptq/fsdp2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/fsdp2.yaml -------------------------------------------------------------------------------- /examples/llm_ptq/hf_ptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/hf_ptq.py -------------------------------------------------------------------------------- /examples/llm_ptq/multinode_ptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/multinode_ptq.py -------------------------------------------------------------------------------- /examples/llm_ptq/notebooks/2_PTQ_AWQ_Calibration.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/notebooks/2_PTQ_AWQ_Calibration.ipynb -------------------------------------------------------------------------------- /examples/llm_ptq/notebooks/3_PTQ_AutoQuantization.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/notebooks/3_PTQ_AutoQuantization.ipynb -------------------------------------------------------------------------------- /examples/llm_ptq/requirements-t5.txt: -------------------------------------------------------------------------------- 1 | transformers==4.48.0 2 | -------------------------------------------------------------------------------- /examples/llm_ptq/requirements-whisper.txt: -------------------------------------------------------------------------------- 1 | librosa 2 | soundfile 3 | -------------------------------------------------------------------------------- /examples/llm_ptq/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/requirements.txt -------------------------------------------------------------------------------- /examples/llm_ptq/run_tensorrt_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/run_tensorrt_llm.py -------------------------------------------------------------------------------- /examples/llm_ptq/scripts/huggingface_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/scripts/huggingface_example.sh -------------------------------------------------------------------------------- /examples/llm_ptq/scripts/parser.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/scripts/parser.sh -------------------------------------------------------------------------------- /examples/llm_ptq/vlm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_ptq/vlm_utils.py -------------------------------------------------------------------------------- /examples/llm_qat/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/README.md -------------------------------------------------------------------------------- /examples/llm_qat/accelerate_config/ddp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/accelerate_config/ddp.yaml -------------------------------------------------------------------------------- /examples/llm_qat/accelerate_config/deepspeed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/accelerate_config/deepspeed.yaml -------------------------------------------------------------------------------- /examples/llm_qat/accelerate_config/fsdp1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/accelerate_config/fsdp1.yaml -------------------------------------------------------------------------------- /examples/llm_qat/accelerate_config/fsdp2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/accelerate_config/fsdp2.yaml -------------------------------------------------------------------------------- /examples/llm_qat/export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/export.py -------------------------------------------------------------------------------- /examples/llm_qat/launch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/launch.sh -------------------------------------------------------------------------------- /examples/llm_qat/llama_factory/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/llama_factory/README.md -------------------------------------------------------------------------------- /examples/llm_qat/llama_factory/data/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/llama_factory/data/dataset_info.json -------------------------------------------------------------------------------- /examples/llm_qat/llama_factory/launch_llamafactory.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/llama_factory/launch_llamafactory.sh -------------------------------------------------------------------------------- /examples/llm_qat/llama_factory/llama_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/llama_factory/llama_config.yaml -------------------------------------------------------------------------------- /examples/llm_qat/llama_factory/llama_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/llama_factory/llama_factory.py -------------------------------------------------------------------------------- /examples/llm_qat/llama_factory/llamafactory_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/llama_factory/llamafactory_cli.py -------------------------------------------------------------------------------- /examples/llm_qat/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/main.py -------------------------------------------------------------------------------- /examples/llm_qat/notebooks/QAT_QAD_Walkthrough.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/notebooks/QAT_QAD_Walkthrough.ipynb -------------------------------------------------------------------------------- /examples/llm_qat/notebooks/requirements.txt: -------------------------------------------------------------------------------- 1 | ipywidgets 2 | nvidia-modelopt[all] 3 | trl 4 | -------------------------------------------------------------------------------- /examples/llm_qat/requirements.txt: -------------------------------------------------------------------------------- 1 | flash-attn 2 | peft 3 | py7zr 4 | sentencepiece>=0.2.0 5 | tensorboardX 6 | -------------------------------------------------------------------------------- /examples/llm_qat/simple_qat_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/simple_qat_train.py -------------------------------------------------------------------------------- /examples/llm_qat/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_qat/utils.py -------------------------------------------------------------------------------- /examples/llm_sparsity/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | -------------------------------------------------------------------------------- /examples/llm_sparsity/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_sparsity/README.md -------------------------------------------------------------------------------- /examples/llm_sparsity/data_prep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_sparsity/data_prep.py -------------------------------------------------------------------------------- /examples/llm_sparsity/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_sparsity/eval.py -------------------------------------------------------------------------------- /examples/llm_sparsity/export_trtllm_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_sparsity/export_trtllm_ckpt.py -------------------------------------------------------------------------------- /examples/llm_sparsity/finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_sparsity/finetune.py -------------------------------------------------------------------------------- /examples/llm_sparsity/hf_pts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_sparsity/hf_pts.py -------------------------------------------------------------------------------- /examples/llm_sparsity/launch_finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_sparsity/launch_finetune.sh -------------------------------------------------------------------------------- /examples/llm_sparsity/requirements.txt: -------------------------------------------------------------------------------- 1 | flash-attn 2 | sentencepiece>=0.2.0 3 | tensorboardX 4 | -------------------------------------------------------------------------------- /examples/llm_sparsity/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/llm_sparsity/utils.py -------------------------------------------------------------------------------- /examples/model_hub/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/model_hub/README.md -------------------------------------------------------------------------------- /examples/model_hub/run_llama_fp8_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/model_hub/run_llama_fp8_sglang.py -------------------------------------------------------------------------------- /examples/model_hub/run_llama_fp8_trtllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/model_hub/run_llama_fp8_trtllm.py -------------------------------------------------------------------------------- /examples/model_hub/run_llama_fp8_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/model_hub/run_llama_fp8_vllm.py -------------------------------------------------------------------------------- /examples/nemo_run/common/in_memory_mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/common/in_memory_mmlu.py -------------------------------------------------------------------------------- /examples/nemo_run/common/llama_chat_template.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/common/llama_chat_template.txt -------------------------------------------------------------------------------- /examples/nemo_run/common/process_climbmix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/common/process_climbmix.py -------------------------------------------------------------------------------- /examples/nemo_run/common/process_lima.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/common/process_lima.py -------------------------------------------------------------------------------- /examples/nemo_run/common/process_openscience.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/common/process_openscience.py -------------------------------------------------------------------------------- /examples/nemo_run/common/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/common/utils.py -------------------------------------------------------------------------------- /examples/nemo_run/prune_distill/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/prune_distill/README.md -------------------------------------------------------------------------------- /examples/nemo_run/prune_distill/nemo_prune_kd_flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/prune_distill/nemo_prune_kd_flow.py -------------------------------------------------------------------------------- /examples/nemo_run/qat/ADVANCED.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/qat/ADVANCED.md -------------------------------------------------------------------------------- /examples/nemo_run/qat/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/qat/README.md -------------------------------------------------------------------------------- /examples/nemo_run/qat/nemo_qat_flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/nemo_run/qat/nemo_qat_flow.py -------------------------------------------------------------------------------- /examples/onnx_ptq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/onnx_ptq/README.md -------------------------------------------------------------------------------- /examples/onnx_ptq/download_example_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/onnx_ptq/download_example_onnx.py -------------------------------------------------------------------------------- /examples/onnx_ptq/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/onnx_ptq/evaluate.py -------------------------------------------------------------------------------- /examples/onnx_ptq/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/onnx_ptq/evaluation.py -------------------------------------------------------------------------------- /examples/onnx_ptq/image_prep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/onnx_ptq/image_prep.py -------------------------------------------------------------------------------- /examples/onnx_ptq/llm_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/onnx_ptq/llm_export.py -------------------------------------------------------------------------------- /examples/onnx_ptq/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets>=2.14.4 2 | optimum 3 | sentencepiece 4 | timm 5 | torchvision 6 | -------------------------------------------------------------------------------- /examples/onnx_ptq/torch_quant_to_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/onnx_ptq/torch_quant_to_onnx.py -------------------------------------------------------------------------------- /examples/pruning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/pruning/README.md -------------------------------------------------------------------------------- /examples/pruning/cifar_resnet.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/pruning/cifar_resnet.ipynb -------------------------------------------------------------------------------- /examples/specdec_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/README.md -------------------------------------------------------------------------------- /examples/specdec_bench/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/run.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/__init__.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/datasets/__init__.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/datasets/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/datasets/base.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/datasets/base_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/datasets/base_hf.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/datasets/mtbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/datasets/mtbench.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/datasets/random_token.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/datasets/random_token.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/metrics/__init__.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/metrics/aa_timing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/metrics/aa_timing.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/metrics/acceptance_rate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/metrics/acceptance_rate.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/metrics/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/metrics/base.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/metrics/mtbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/metrics/mtbench.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/metrics/timing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/metrics/timing.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/models/__init__.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/models/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/models/base.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/models/sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/models/sglang.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/models/trtllm_torch_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/models/trtllm_torch_api.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/models/vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/models/vllm.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/runners/__init__.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/runners/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/runners/base.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/runners/simple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/runners/simple.py -------------------------------------------------------------------------------- /examples/specdec_bench/specdec_bench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/specdec_bench/specdec_bench/utils.py -------------------------------------------------------------------------------- /examples/speculative_decoding/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/.gitignore -------------------------------------------------------------------------------- /examples/speculative_decoding/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/README.md -------------------------------------------------------------------------------- /examples/speculative_decoding/SLURM_prepare_data.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/SLURM_prepare_data.md -------------------------------------------------------------------------------- /examples/speculative_decoding/collect_hidden_states/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/collect_hidden_states/__init__.py -------------------------------------------------------------------------------- /examples/speculative_decoding/distributed_generate/launch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/distributed_generate/launch.sh -------------------------------------------------------------------------------- /examples/speculative_decoding/distributed_generate/worker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/distributed_generate/worker.sh -------------------------------------------------------------------------------- /examples/speculative_decoding/eagle_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/eagle_config.json -------------------------------------------------------------------------------- /examples/speculative_decoding/eagle_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/eagle_utils.py -------------------------------------------------------------------------------- /examples/speculative_decoding/example.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/example.ipynb -------------------------------------------------------------------------------- /examples/speculative_decoding/launch_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/launch_train.sh -------------------------------------------------------------------------------- /examples/speculative_decoding/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/main.py -------------------------------------------------------------------------------- /examples/speculative_decoding/medusa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/medusa_utils.py -------------------------------------------------------------------------------- /examples/speculative_decoding/requirements.txt: -------------------------------------------------------------------------------- 1 | flash-attn 2 | openai 3 | py7zr 4 | sentencepiece>=0.2.0 5 | tensorboardX 6 | -------------------------------------------------------------------------------- /examples/speculative_decoding/scripts/ar_validate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/scripts/ar_validate.py -------------------------------------------------------------------------------- /examples/speculative_decoding/scripts/calibrate_draft_vocab.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/scripts/calibrate_draft_vocab.py -------------------------------------------------------------------------------- /examples/speculative_decoding/scripts/convert_to_vllm_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/scripts/convert_to_vllm_ckpt.py -------------------------------------------------------------------------------- /examples/speculative_decoding/scripts/export_hf_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/scripts/export_hf_checkpoint.py -------------------------------------------------------------------------------- /examples/speculative_decoding/scripts/server_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/scripts/server_generate.py -------------------------------------------------------------------------------- /examples/speculative_decoding/train_eagle3_and_export.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/speculative_decoding/train_eagle3_and_export.sh -------------------------------------------------------------------------------- /examples/vllm_serve/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/vllm_serve/Dockerfile -------------------------------------------------------------------------------- /examples/vllm_serve/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/vllm_serve/README.md -------------------------------------------------------------------------------- /examples/vllm_serve/fakequant_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/vllm_serve/fakequant_worker.py -------------------------------------------------------------------------------- /examples/vllm_serve/vllm_serve_fakequant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/vllm_serve/vllm_serve_fakequant.py -------------------------------------------------------------------------------- /examples/vlm_ptq/.gitignore: -------------------------------------------------------------------------------- 1 | saved_models_* 2 | -------------------------------------------------------------------------------- /examples/vlm_ptq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/vlm_ptq/README.md -------------------------------------------------------------------------------- /examples/vlm_ptq/requirements-vila.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/vlm_ptq/requirements-vila.txt -------------------------------------------------------------------------------- /examples/vlm_ptq/scripts/huggingface_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/vlm_ptq/scripts/huggingface_example.sh -------------------------------------------------------------------------------- /examples/windows/Benchmark.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/Benchmark.md -------------------------------------------------------------------------------- /examples/windows/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/README.md -------------------------------------------------------------------------------- /examples/windows/accuracy_benchmark/GenAI_API_changes_0.6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/accuracy_benchmark/GenAI_API_changes_0.6.png -------------------------------------------------------------------------------- /examples/windows/accuracy_benchmark/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/accuracy_benchmark/README.md -------------------------------------------------------------------------------- /examples/windows/accuracy_benchmark/mmlu_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/accuracy_benchmark/mmlu_benchmark.py -------------------------------------------------------------------------------- /examples/windows/accuracy_benchmark/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/accuracy_benchmark/modeling.py -------------------------------------------------------------------------------- /examples/windows/accuracy_benchmark/quantization_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/accuracy_benchmark/quantization_utils.py -------------------------------------------------------------------------------- /examples/windows/accuracy_benchmark/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/accuracy_benchmark/requirements.txt -------------------------------------------------------------------------------- /examples/windows/accuracy_benchmark/trtllm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/accuracy_benchmark/trtllm_utils.py -------------------------------------------------------------------------------- /examples/windows/onnx_ptq/genai_llm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/onnx_ptq/genai_llm/README.md -------------------------------------------------------------------------------- /examples/windows/onnx_ptq/genai_llm/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/onnx_ptq/genai_llm/quantize.py -------------------------------------------------------------------------------- /examples/windows/onnx_ptq/genai_llm/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/onnx_ptq/genai_llm/requirements.txt -------------------------------------------------------------------------------- /examples/windows/onnx_ptq/sam2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/onnx_ptq/sam2/README.md -------------------------------------------------------------------------------- /examples/windows/onnx_ptq/sam2/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/onnx_ptq/sam2/requirements.txt -------------------------------------------------------------------------------- /examples/windows/onnx_ptq/sam2/sam2_onnx_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/onnx_ptq/sam2/sam2_onnx_quantization.py -------------------------------------------------------------------------------- /examples/windows/onnx_ptq/whisper/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/onnx_ptq/whisper/README.md -------------------------------------------------------------------------------- /examples/windows/onnx_ptq/whisper/demo.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/onnx_ptq/whisper/demo.wav -------------------------------------------------------------------------------- /examples/windows/onnx_ptq/whisper/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/onnx_ptq/whisper/requirements.txt -------------------------------------------------------------------------------- /examples/windows/onnx_ptq/whisper/whisper_onnx_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/examples/windows/onnx_ptq/whisper/whisper_onnx_quantization.py -------------------------------------------------------------------------------- /modelopt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/__init__.py -------------------------------------------------------------------------------- /modelopt/deploy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/deploy/__init__.py -------------------------------------------------------------------------------- /modelopt/deploy/llm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/deploy/llm/__init__.py -------------------------------------------------------------------------------- /modelopt/deploy/llm/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/deploy/llm/generate.py -------------------------------------------------------------------------------- /modelopt/onnx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/__init__.py -------------------------------------------------------------------------------- /modelopt/onnx/autocast/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/autocast/__init__.py -------------------------------------------------------------------------------- /modelopt/onnx/autocast/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/autocast/__main__.py -------------------------------------------------------------------------------- /modelopt/onnx/autocast/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/autocast/convert.py -------------------------------------------------------------------------------- /modelopt/onnx/autocast/graphsanitizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/autocast/graphsanitizer.py -------------------------------------------------------------------------------- /modelopt/onnx/autocast/logging_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/autocast/logging_config.py -------------------------------------------------------------------------------- /modelopt/onnx/autocast/nodeclassifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/autocast/nodeclassifier.py -------------------------------------------------------------------------------- /modelopt/onnx/autocast/precisionconverter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/autocast/precisionconverter.py -------------------------------------------------------------------------------- /modelopt/onnx/autocast/referencerunner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/autocast/referencerunner.py -------------------------------------------------------------------------------- /modelopt/onnx/autocast/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/autocast/utils.py -------------------------------------------------------------------------------- /modelopt/onnx/export/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/export/__init__.py -------------------------------------------------------------------------------- /modelopt/onnx/export/base_exporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/export/base_exporter.py -------------------------------------------------------------------------------- /modelopt/onnx/export/fp8_exporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/export/fp8_exporter.py -------------------------------------------------------------------------------- /modelopt/onnx/export/int4_exporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/export/int4_exporter.py -------------------------------------------------------------------------------- /modelopt/onnx/export/int8_exporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/export/int8_exporter.py -------------------------------------------------------------------------------- /modelopt/onnx/export/mxfp8_exporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/export/mxfp8_exporter.py -------------------------------------------------------------------------------- /modelopt/onnx/export/nvfp4_exporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/export/nvfp4_exporter.py -------------------------------------------------------------------------------- /modelopt/onnx/llm_export_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/llm_export_utils/__init__.py -------------------------------------------------------------------------------- /modelopt/onnx/llm_export_utils/export_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/llm_export_utils/export_utils.py -------------------------------------------------------------------------------- /modelopt/onnx/llm_export_utils/quantization_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/llm_export_utils/quantization_utils.py -------------------------------------------------------------------------------- /modelopt/onnx/llm_export_utils/surgeon_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/llm_export_utils/surgeon_utils.py -------------------------------------------------------------------------------- /modelopt/onnx/logging_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/logging_config.py -------------------------------------------------------------------------------- /modelopt/onnx/op_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/op_types.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/__init__.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/__main__.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/calib_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/calib_utils.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/extensions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/extensions.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/fp8.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/graph_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/graph_utils.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/gs_patching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/gs_patching.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/int4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/int4.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/int8.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/operators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/operators.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/ort_patching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/ort_patching.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/ort_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/ort_utils.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/partitioning.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/qdq_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/qdq_utils.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/quant_utils.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/quantize.py -------------------------------------------------------------------------------- /modelopt/onnx/quantization/src/modelopt_round_and_pack_ext.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/quantization/src/modelopt_round_and_pack_ext.cpp -------------------------------------------------------------------------------- /modelopt/onnx/trt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/trt_utils.py -------------------------------------------------------------------------------- /modelopt/onnx/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/onnx/utils.py -------------------------------------------------------------------------------- /modelopt/torch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/common.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/ort_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/ort_client.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/registry.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/runtime_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/runtime_client.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/tensorrt/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/tensorrt/constants.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/tensorrt/hw_param_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/tensorrt/hw_param_config.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/tensorrt/layerwise_profiling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/tensorrt/layerwise_profiling.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/tensorrt/parse_trtexec_log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/tensorrt/parse_trtexec_log.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/tensorrt/tensorrt_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/tensorrt/tensorrt_utils.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/_runtime/trt_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/_runtime/trt_client.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/compilation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/compilation.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/device_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/device_model.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/profiling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/profiling.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/utils/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/utils/onnx_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/utils/onnx_optimizer.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/utils/onnx_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/utils/onnx_utils.py -------------------------------------------------------------------------------- /modelopt/torch/_deploy/utils/torch_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/_deploy/utils/torch_onnx.py -------------------------------------------------------------------------------- /modelopt/torch/distill/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/distill/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/config.py -------------------------------------------------------------------------------- /modelopt/torch/distill/distillation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/distillation.py -------------------------------------------------------------------------------- /modelopt/torch/distill/distillation_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/distillation_model.py -------------------------------------------------------------------------------- /modelopt/torch/distill/loss_balancers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/loss_balancers.py -------------------------------------------------------------------------------- /modelopt/torch/distill/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/losses.py -------------------------------------------------------------------------------- /modelopt/torch/distill/mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/mode.py -------------------------------------------------------------------------------- /modelopt/torch/distill/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/distill/plugins/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/plugins/huggingface.py -------------------------------------------------------------------------------- /modelopt/torch/distill/plugins/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/plugins/megatron.py -------------------------------------------------------------------------------- /modelopt/torch/distill/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/distill/registry.py -------------------------------------------------------------------------------- /modelopt/torch/export/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/export/convert_hf_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/convert_hf_config.py -------------------------------------------------------------------------------- /modelopt/torch/export/distribute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/distribute.py -------------------------------------------------------------------------------- /modelopt/torch/export/hf_config_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/hf_config_map.py -------------------------------------------------------------------------------- /modelopt/torch/export/layer_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/layer_utils.py -------------------------------------------------------------------------------- /modelopt/torch/export/mcore_config_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/mcore_config_map.py -------------------------------------------------------------------------------- /modelopt/torch/export/model_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/model_config.py -------------------------------------------------------------------------------- /modelopt/torch/export/model_config_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/model_config_export.py -------------------------------------------------------------------------------- /modelopt/torch/export/model_config_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/model_config_utils.py -------------------------------------------------------------------------------- /modelopt/torch/export/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/model_utils.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/hf_spec_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/hf_spec_export.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/mcore_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/mcore_common.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/mcore_custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/mcore_custom.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/mcore_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/mcore_deepseek.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/mcore_gptoss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/mcore_gptoss.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/mcore_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/mcore_llama.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/mcore_nemotron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/mcore_nemotron.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/mcore_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/mcore_qwen.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/megatron_importer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/megatron_importer.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/nemo_run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/nemo_run.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/vllm_fakequant_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/vllm_fakequant_hf.py -------------------------------------------------------------------------------- /modelopt/torch/export/plugins/vllm_fakequant_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/plugins/vllm_fakequant_megatron.py -------------------------------------------------------------------------------- /modelopt/torch/export/postprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/postprocess.py -------------------------------------------------------------------------------- /modelopt/torch/export/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/quant_utils.py -------------------------------------------------------------------------------- /modelopt/torch/export/tensorrt_llm_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/tensorrt_llm_type.py -------------------------------------------------------------------------------- /modelopt/torch/export/tensorrt_llm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/tensorrt_llm_utils.py -------------------------------------------------------------------------------- /modelopt/torch/export/transformer_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/transformer_engine.py -------------------------------------------------------------------------------- /modelopt/torch/export/unified_export_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/unified_export_hf.py -------------------------------------------------------------------------------- /modelopt/torch/export/unified_export_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/export/unified_export_megatron.py -------------------------------------------------------------------------------- /modelopt/torch/nas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/nas/algorithms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/algorithms.py -------------------------------------------------------------------------------- /modelopt/torch/nas/autonas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/autonas.py -------------------------------------------------------------------------------- /modelopt/torch/nas/conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/conversion.py -------------------------------------------------------------------------------- /modelopt/torch/nas/hparams/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/hparams/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/nas/hparams/concat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/hparams/concat.py -------------------------------------------------------------------------------- /modelopt/torch/nas/hparams/container.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/hparams/container.py -------------------------------------------------------------------------------- /modelopt/torch/nas/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/modules/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/nas/modules/container.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/modules/container.py -------------------------------------------------------------------------------- /modelopt/torch/nas/modules/conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/modules/conv.py -------------------------------------------------------------------------------- /modelopt/torch/nas/modules/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/modules/linear.py -------------------------------------------------------------------------------- /modelopt/torch/nas/modules/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/modules/norm.py -------------------------------------------------------------------------------- /modelopt/torch/nas/modules/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/modules/utils.py -------------------------------------------------------------------------------- /modelopt/torch/nas/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/patch.py -------------------------------------------------------------------------------- /modelopt/torch/nas/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/nas/plugins/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/plugins/megatron.py -------------------------------------------------------------------------------- /modelopt/torch/nas/plugins/torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/plugins/torch.py -------------------------------------------------------------------------------- /modelopt/torch/nas/plugins/transformer_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/plugins/transformer_engine.py -------------------------------------------------------------------------------- /modelopt/torch/nas/plugins/transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/plugins/transformers.py -------------------------------------------------------------------------------- /modelopt/torch/nas/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/registry.py -------------------------------------------------------------------------------- /modelopt/torch/nas/search_space.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/search_space.py -------------------------------------------------------------------------------- /modelopt/torch/nas/traced_hp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/traced_hp.py -------------------------------------------------------------------------------- /modelopt/torch/nas/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/nas/utils.py -------------------------------------------------------------------------------- /modelopt/torch/opt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/opt/_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/_hooks.py -------------------------------------------------------------------------------- /modelopt/torch/opt/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/config.py -------------------------------------------------------------------------------- /modelopt/torch/opt/conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/conversion.py -------------------------------------------------------------------------------- /modelopt/torch/opt/dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/dynamic.py -------------------------------------------------------------------------------- /modelopt/torch/opt/hparam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/hparam.py -------------------------------------------------------------------------------- /modelopt/torch/opt/mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/mode.py -------------------------------------------------------------------------------- /modelopt/torch/opt/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/opt/plugins/diffusers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/plugins/diffusers.py -------------------------------------------------------------------------------- /modelopt/torch/opt/plugins/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/plugins/huggingface.py -------------------------------------------------------------------------------- /modelopt/torch/opt/plugins/mcore_dist_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py -------------------------------------------------------------------------------- /modelopt/torch/opt/plugins/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/plugins/megatron.py -------------------------------------------------------------------------------- /modelopt/torch/opt/plugins/peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/plugins/peft.py -------------------------------------------------------------------------------- /modelopt/torch/opt/plugins/transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/plugins/transformers.py -------------------------------------------------------------------------------- /modelopt/torch/opt/searcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/searcher.py -------------------------------------------------------------------------------- /modelopt/torch/opt/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/opt/utils.py -------------------------------------------------------------------------------- /modelopt/torch/peft/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/peft/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/peft/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/peft/config.py -------------------------------------------------------------------------------- /modelopt/torch/peft/conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/peft/conversion.py -------------------------------------------------------------------------------- /modelopt/torch/peft/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/peft/convert.py -------------------------------------------------------------------------------- /modelopt/torch/peft/custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/peft/custom.py -------------------------------------------------------------------------------- /modelopt/torch/peft/lora/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/peft/lora/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/peft/lora/layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/peft/lora/layer.py -------------------------------------------------------------------------------- /modelopt/torch/peft/lora/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/peft/lora/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/peft/lora/plugins/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/peft/lora/plugins/megatron.py -------------------------------------------------------------------------------- /modelopt/torch/peft/mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/peft/mode.py -------------------------------------------------------------------------------- /modelopt/torch/prune/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/prune/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/prune/fastnas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/prune/fastnas.py -------------------------------------------------------------------------------- /modelopt/torch/prune/gradnas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/prune/gradnas.py -------------------------------------------------------------------------------- /modelopt/torch/prune/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/prune/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/prune/plugins/mcore_minitron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/prune/plugins/mcore_minitron.py -------------------------------------------------------------------------------- /modelopt/torch/prune/plugins/transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/prune/plugins/transformers.py -------------------------------------------------------------------------------- /modelopt/torch/prune/pruning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/prune/pruning.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/algorithms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/algorithms.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/backends/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/backends/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/backends/fp8_per_tensor_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/backends/fp8_per_tensor_gemm.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/backends/gemm_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/backends/gemm_registry.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/backends/nvfp4_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/backends/nvfp4_gemm.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/backends/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/backends/utils.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/calib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/calib/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/calib/bias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/calib/bias.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/calib/calibrator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/calib/calibrator.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/calib/histogram.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/calib/histogram.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/calib/max.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/calib/max.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/calib/mse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/calib/mse.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/compress.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/compress.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/config.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/conversion.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/export_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/export_onnx.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/extensions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/extensions.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/mode.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/model_calib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/model_calib.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/model_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/model_quant.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/functional.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/modules/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/modules/quant_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/modules/quant_activations.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/modules/quant_batchnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/modules/quant_batchnorm.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/modules/quant_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/modules/quant_conv.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/modules/quant_instancenorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/modules/quant_instancenorm.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/modules/quant_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/modules/quant_linear.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/modules/quant_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/modules/quant_module.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/modules/quant_pooling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/modules/quant_pooling.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/modules/quant_rnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/modules/quant_rnn.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/nn/modules/tensor_quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/nn/modules/tensor_quantizer.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/accelerate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/accelerate.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/apex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/apex.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/attention.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/custom.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/diffusers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/diffusers.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/fairscale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/fairscale.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/huggingface.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/megatron.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/peft.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/pytorch_geometric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/pytorch_geometric.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/transformer_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/transformer_engine.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/transformers.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/transformers_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/transformers_trainer.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/trl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/trl.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/plugins/vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/plugins/vllm.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/qtensor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/qtensor/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/qtensor/base_qtensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/qtensor/base_qtensor.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/qtensor/fp8_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/qtensor/fp8_tensor.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/qtensor/int4_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/qtensor/int4_tensor.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/qtensor/int8_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/qtensor/int8_tensor.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/qtensor/mxfp4_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/qtensor/mxfp4_tensor.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/qtensor/nf4_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/qtensor/nf4_tensor.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/qtensor/nvfp4_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/qtensor/nvfp4_tensor.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/src/tensor_quant.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/src/tensor_quant.cpp -------------------------------------------------------------------------------- /modelopt/torch/quantization/src/tensor_quant.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/src/tensor_quant.h -------------------------------------------------------------------------------- /modelopt/torch/quantization/src/tensor_quant_gpu.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/src/tensor_quant_gpu.cu -------------------------------------------------------------------------------- /modelopt/torch/quantization/src/tensor_quant_gpu_fp8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/src/tensor_quant_gpu_fp8.cu -------------------------------------------------------------------------------- /modelopt/torch/quantization/src/tensor_quant_mx.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/src/tensor_quant_mx.cu -------------------------------------------------------------------------------- /modelopt/torch/quantization/src/tensor_quant_mx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/src/tensor_quant_mx.h -------------------------------------------------------------------------------- /modelopt/torch/quantization/tensor_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/tensor_quant.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/triton/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/triton/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/triton/fp4_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/triton/fp4_kernel.py -------------------------------------------------------------------------------- /modelopt/torch/quantization/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/quantization/utils.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/weight_sparsity/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/weight_sparsity/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/weight_sparsity/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/weight_sparsity/config.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/weight_sparsity/magnitude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/weight_sparsity/magnitude.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/weight_sparsity/mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/weight_sparsity/mode.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/weight_sparsity/module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/weight_sparsity/module.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/weight_sparsity/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/weight_sparsity/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/weight_sparsity/plugins/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/weight_sparsity/plugins/megatron.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/weight_sparsity/searcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/weight_sparsity/searcher.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/weight_sparsity/sparsegpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/weight_sparsity/sparsegpt.py -------------------------------------------------------------------------------- /modelopt/torch/sparsity/weight_sparsity/sparsification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/sparsity/weight_sparsity/sparsification.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/config.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/eagle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/eagle/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/eagle/conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/eagle/conversion.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/eagle/default_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/eagle/default_config.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/eagle/eagle_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/eagle/eagle_model.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/eagle/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/eagle/utils.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/medusa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/medusa/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/medusa/conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/medusa/conversion.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/medusa/medusa_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/medusa/medusa_model.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/mode.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/plugins/megatron_eagle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/plugins/megatron_eagle.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/plugins/megatron_medusa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/plugins/megatron_medusa.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/plugins/transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/plugins/transformers.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/speculative_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/speculative_decoding.py -------------------------------------------------------------------------------- /modelopt/torch/speculative/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/speculative/utils.py -------------------------------------------------------------------------------- /modelopt/torch/trace/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/trace/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/trace/analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/trace/analyzer.py -------------------------------------------------------------------------------- /modelopt/torch/trace/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/trace/modules/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/trace/modules/concat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/trace/modules/concat.py -------------------------------------------------------------------------------- /modelopt/torch/trace/modules/nn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/trace/modules/nn.py -------------------------------------------------------------------------------- /modelopt/torch/trace/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/trace/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/trace/plugins/transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/trace/plugins/transformers.py -------------------------------------------------------------------------------- /modelopt/torch/trace/symbols.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/trace/symbols.py -------------------------------------------------------------------------------- /modelopt/torch/trace/tracer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/trace/tracer.py -------------------------------------------------------------------------------- /modelopt/torch/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/utils/_pytree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/_pytree.py -------------------------------------------------------------------------------- /modelopt/torch/utils/cpp_extension.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/cpp_extension.py -------------------------------------------------------------------------------- /modelopt/torch/utils/dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/dataset_utils.py -------------------------------------------------------------------------------- /modelopt/torch/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/distributed.py -------------------------------------------------------------------------------- /modelopt/torch/utils/graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/graph.py -------------------------------------------------------------------------------- /modelopt/torch/utils/image_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/image_processor.py -------------------------------------------------------------------------------- /modelopt/torch/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/import_utils.py -------------------------------------------------------------------------------- /modelopt/torch/utils/list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/list.py -------------------------------------------------------------------------------- /modelopt/torch/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/logging.py -------------------------------------------------------------------------------- /modelopt/torch/utils/memory_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/memory_monitor.py -------------------------------------------------------------------------------- /modelopt/torch/utils/network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/network.py -------------------------------------------------------------------------------- /modelopt/torch/utils/perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/perf.py -------------------------------------------------------------------------------- /modelopt/torch/utils/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/plugins/__init__.py -------------------------------------------------------------------------------- /modelopt/torch/utils/plugins/megatron_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/plugins/megatron_generate.py -------------------------------------------------------------------------------- /modelopt/torch/utils/plugins/megatron_mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/plugins/megatron_mmlu.py -------------------------------------------------------------------------------- /modelopt/torch/utils/plugins/megatron_preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/plugins/megatron_preprocess_data.py -------------------------------------------------------------------------------- /modelopt/torch/utils/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/random.py -------------------------------------------------------------------------------- /modelopt/torch/utils/regex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/regex.py -------------------------------------------------------------------------------- /modelopt/torch/utils/speech_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/speech_dataset_utils.py -------------------------------------------------------------------------------- /modelopt/torch/utils/tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/tensor.py -------------------------------------------------------------------------------- /modelopt/torch/utils/vlm_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/modelopt/torch/utils/vlm_dataset_utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/pyproject.toml -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/setup.py -------------------------------------------------------------------------------- /tests/_test_utils/deploy_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/deploy_utils.py -------------------------------------------------------------------------------- /tests/_test_utils/examples/llm_ptq_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/examples/llm_ptq_utils.py -------------------------------------------------------------------------------- /tests/_test_utils/examples/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/examples/models.py -------------------------------------------------------------------------------- /tests/_test_utils/examples/onnx_ptq/aggregate_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/examples/onnx_ptq/aggregate_results.py -------------------------------------------------------------------------------- /tests/_test_utils/examples/run_command.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/examples/run_command.py -------------------------------------------------------------------------------- /tests/_test_utils/import_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/import_helper.py -------------------------------------------------------------------------------- /tests/_test_utils/onnx/autocast/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/onnx/autocast/utils.py -------------------------------------------------------------------------------- /tests/_test_utils/onnx/lib_test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/onnx/lib_test_models.py -------------------------------------------------------------------------------- /tests/_test_utils/onnx/quantization/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/onnx/quantization/utils.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/deploy/device_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/deploy/device_model.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/deploy/lib_test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/deploy/lib_test_models.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/deploy/runtime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/deploy/runtime.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/diffusers_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/diffusers_models.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/distributed/fsdp_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/distributed/fsdp_test.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/distributed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/distributed/utils.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/export/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/export/utils.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/megatron/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/megatron/models.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/megatron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/megatron/utils.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/misc.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/nas_prune/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/nas_prune/models.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/opt/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/opt/utils.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/quantization/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/quantization/models.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/quantization/onnx_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/quantization/onnx_export.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/quantization/quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/quantization/quant_utils.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/quantization/quantize_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/quantization/quantize_common.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/quantization/tensor_quant_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/quantization/tensor_quant_common.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/quantization/tensor_quantizer_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/quantization/tensor_quantizer_common.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/sparsity/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/sparsity/utils.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/transformers_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/transformers_models.py -------------------------------------------------------------------------------- /tests/_test_utils/torch/vision_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/_test_utils/torch/vision_models.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/README.md -------------------------------------------------------------------------------- /tests/examples/cnn_qat/test_resnet50.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/cnn_qat/test_resnet50.py -------------------------------------------------------------------------------- /tests/examples/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/conftest.py -------------------------------------------------------------------------------- /tests/examples/diffusers/test_cache_diffusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/diffusers/test_cache_diffusion.py -------------------------------------------------------------------------------- /tests/examples/diffusers/test_diffusers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/diffusers/test_diffusers.py -------------------------------------------------------------------------------- /tests/examples/gpt_oss/test_gpt_oss_qat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/gpt_oss/test_gpt_oss_qat.py -------------------------------------------------------------------------------- /tests/examples/llm_autodeploy/test_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/llm_autodeploy/test_llama.py -------------------------------------------------------------------------------- /tests/examples/llm_distill/test_llm_distill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/llm_distill/test_llm_distill.py -------------------------------------------------------------------------------- /tests/examples/llm_eval/test_llm_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/llm_eval/test_llm_eval.py -------------------------------------------------------------------------------- /tests/examples/llm_ptq/test_deploy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/llm_ptq/test_deploy.py -------------------------------------------------------------------------------- /tests/examples/llm_ptq/test_llm_ptq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/llm_ptq/test_llm_ptq.py -------------------------------------------------------------------------------- /tests/examples/llm_qat/test_llm_qat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/llm_qat/test_llm_qat.py -------------------------------------------------------------------------------- /tests/examples/llm_sparsity/test_llama_sparsify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/llm_sparsity/test_llama_sparsify.py -------------------------------------------------------------------------------- /tests/examples/onnx_ptq/test_llm_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/onnx_ptq/test_llm_export.py -------------------------------------------------------------------------------- /tests/examples/onnx_ptq/test_torch_quant_to_onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/onnx_ptq/test_torch_quant_to_onnx.py -------------------------------------------------------------------------------- /tests/examples/speculative_decoding/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/speculative_decoding/conftest.py -------------------------------------------------------------------------------- /tests/examples/speculative_decoding/test_eagle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/speculative_decoding/test_eagle.py -------------------------------------------------------------------------------- /tests/examples/speculative_decoding/test_medusa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/speculative_decoding/test_medusa.py -------------------------------------------------------------------------------- /tests/examples/test_onnx_ptq.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/test_onnx_ptq.sh -------------------------------------------------------------------------------- /tests/examples/vlm_ptq/test_qwen_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/examples/vlm_ptq/test_qwen_vl.py -------------------------------------------------------------------------------- /tests/gpu/_extensions/test_onnx_extensions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/_extensions/test_onnx_extensions.py -------------------------------------------------------------------------------- /tests/gpu/_extensions/test_torch_extensions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/_extensions/test_torch_extensions.py -------------------------------------------------------------------------------- /tests/gpu/onnx/test_concat_elim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/onnx/test_concat_elim.py -------------------------------------------------------------------------------- /tests/gpu/onnx/test_ort_patching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/onnx/test_ort_patching.py -------------------------------------------------------------------------------- /tests/gpu/onnx/test_plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/onnx/test_plugin.py -------------------------------------------------------------------------------- /tests/gpu/onnx/test_qdq_utils_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/onnx/test_qdq_utils_fp8.py -------------------------------------------------------------------------------- /tests/gpu/onnx/test_quantize_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/onnx/test_quantize_fp8.py -------------------------------------------------------------------------------- /tests/gpu/onnx/test_quantize_onnx_torch_int4_awq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/onnx/test_quantize_onnx_torch_int4_awq.py -------------------------------------------------------------------------------- /tests/gpu/onnx/test_simplify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/onnx/test_simplify.py -------------------------------------------------------------------------------- /tests/gpu/torch/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/conftest.py -------------------------------------------------------------------------------- /tests/gpu/torch/deploy/_runtime/test_trt_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/deploy/_runtime/test_trt_client.py -------------------------------------------------------------------------------- /tests/gpu/torch/deploy/test_gpu_deploy_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/deploy/test_gpu_deploy_benchmark.py -------------------------------------------------------------------------------- /tests/gpu/torch/export/test_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/export/test_export.py -------------------------------------------------------------------------------- /tests/gpu/torch/export/test_export_weight_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/export/test_export_weight_gpu.py -------------------------------------------------------------------------------- /tests/gpu/torch/export/test_fsdp2_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/export/test_fsdp2_export.py -------------------------------------------------------------------------------- /tests/gpu/torch/export/test_quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/export/test_quant_utils.py -------------------------------------------------------------------------------- /tests/gpu/torch/export/test_unified_export_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/export/test_unified_export_megatron.py -------------------------------------------------------------------------------- /tests/gpu/torch/export/test_vllm_fakequant_hf_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/export/test_vllm_fakequant_hf_export.py -------------------------------------------------------------------------------- /tests/gpu/torch/export/test_vllm_fakequant_megatron_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/export/test_vllm_fakequant_megatron_export.py -------------------------------------------------------------------------------- /tests/gpu/torch/nas/test_distributed_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/nas/test_distributed_model.py -------------------------------------------------------------------------------- /tests/gpu/torch/nas/test_search_space_with_vision_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/nas/test_search_space_with_vision_models.py -------------------------------------------------------------------------------- /tests/gpu/torch/opt/plugins/test_megatron_chaining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/opt/plugins/test_megatron_chaining.py -------------------------------------------------------------------------------- /tests/gpu/torch/opt/plugins/test_transformers_multi_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/opt/plugins/test_transformers_multi_process.py -------------------------------------------------------------------------------- /tests/gpu/torch/opt/test_fsdp_save_restore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/opt/test_fsdp_save_restore.py -------------------------------------------------------------------------------- /tests/gpu/torch/peft/test_megatron_peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/peft/test_megatron_peft.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/backends/test_gemm_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/backends/test_gemm_common.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/backends/test_gemm_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/backends/test_gemm_registry.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/backends/test_nvfp4_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/backends/test_nvfp4_gemm.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/conftest.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/plugins/test_accelerate_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/plugins/test_accelerate_gpu.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/plugins/test_apex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/plugins/test_apex.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/plugins/test_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/plugins/test_megatron.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/plugins/test_transformer_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/plugins/test_transformer_engine.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/plugins/test_transformers_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/plugins/test_transformers_tp.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_deepspeed.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_fsdp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_fsdp.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_fsdp2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_fsdp2.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_hadamard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_hadamard.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_nvfp4_onnx_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_nvfp4_onnx_export.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_onnx_export_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_onnx_export_cuda.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_qtensor_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_qtensor_cuda.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_quant_rnn_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_quant_rnn_cuda.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_quantize_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_quantize_cuda.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_quantize_mxformats_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_quantize_mxformats_cuda.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_real_quantize_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_real_quantize_cuda.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_tensor_quant_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_tensor_quant_cuda.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_tensor_quantizer_cuda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_tensor_quantizer_cuda.py -------------------------------------------------------------------------------- /tests/gpu/torch/quantization/test_torch_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/quantization/test_torch_export.py -------------------------------------------------------------------------------- /tests/gpu/torch/sparsity/weight_sparsity/test_sparse_fsdp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/sparsity/weight_sparsity/test_sparse_fsdp.py -------------------------------------------------------------------------------- /tests/gpu/torch/utils/plugins/test_utils_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/gpu/torch/utils/plugins/test_utils_megatron.py -------------------------------------------------------------------------------- /tests/unit/deploy/test_deploy_dummy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/deploy/test_deploy_dummy.py -------------------------------------------------------------------------------- /tests/unit/onnx/autocast/test_autocast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/autocast/test_autocast.py -------------------------------------------------------------------------------- /tests/unit/onnx/autocast/test_graphsanitizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/autocast/test_graphsanitizer.py -------------------------------------------------------------------------------- /tests/unit/onnx/autocast/test_nodeclassifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/autocast/test_nodeclassifier.py -------------------------------------------------------------------------------- /tests/unit/onnx/autocast/test_precisionconverter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/autocast/test_precisionconverter.py -------------------------------------------------------------------------------- /tests/unit/onnx/autocast/test_referencerunner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/autocast/test_referencerunner.py -------------------------------------------------------------------------------- /tests/unit/onnx/test_convtranspose_qdq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/test_convtranspose_qdq.py -------------------------------------------------------------------------------- /tests/unit/onnx/test_onnx_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/test_onnx_utils.py -------------------------------------------------------------------------------- /tests/unit/onnx/test_partitioning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/test_partitioning.py -------------------------------------------------------------------------------- /tests/unit/onnx/test_qdq_rules_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/test_qdq_rules_int8.py -------------------------------------------------------------------------------- /tests/unit/onnx/test_qdq_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/test_qdq_utils.py -------------------------------------------------------------------------------- /tests/unit/onnx/test_quant_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/test_quant_utils.py -------------------------------------------------------------------------------- /tests/unit/onnx/test_quantize_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/test_quantize_int8.py -------------------------------------------------------------------------------- /tests/unit/onnx/test_quantize_zint4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/onnx/test_quantize_zint4.py -------------------------------------------------------------------------------- /tests/unit/torch/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/conftest.py -------------------------------------------------------------------------------- /tests/unit/torch/deploy/test_cpu_deploy_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/deploy/test_cpu_deploy_benchmark.py -------------------------------------------------------------------------------- /tests/unit/torch/deploy/test_runtime_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/deploy/test_runtime_config.py -------------------------------------------------------------------------------- /tests/unit/torch/deploy/utils/test_torch_onnx_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py -------------------------------------------------------------------------------- /tests/unit/torch/distill/test_distill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/distill/test_distill.py -------------------------------------------------------------------------------- /tests/unit/torch/export/test_export_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/export/test_export_weight.py -------------------------------------------------------------------------------- /tests/unit/torch/export/test_get_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/export/test_get_quantization.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/modules/test_container.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/modules/test_container.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/modules/test_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/modules/test_conv.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/modules/test_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/modules/test_linear.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/modules/test_mod_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/modules/test_mod_utils.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/modules/test_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/modules/test_norm.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/plugins/test_dynamic_hf_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/plugins/test_dynamic_hf_attention.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/plugins/test_hf_nas_save_restore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/plugins/test_hf_nas_save_restore.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/plugins/test_torch_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/plugins/test_torch_hooks.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/test_evaluate_constraints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/test_evaluate_constraints.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/test_full_algorithms_vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/test_full_algorithms_vision.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/test_latency_interpolator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/test_latency_interpolator.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/test_nas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/test_nas.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/test_nas_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/test_nas_utils.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/test_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/test_registry.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/test_search_space.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/test_search_space.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/test_search_space_with_example_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/test_search_space_with_example_models.py -------------------------------------------------------------------------------- /tests/unit/torch/nas/test_search_space_with_torchvision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/nas/test_search_space_with_torchvision.py -------------------------------------------------------------------------------- /tests/unit/torch/opt/plugins/test_diffusers_save_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/opt/plugins/test_diffusers_save_load.py -------------------------------------------------------------------------------- /tests/unit/torch/opt/plugins/test_hf_patching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/opt/plugins/test_hf_patching.py -------------------------------------------------------------------------------- /tests/unit/torch/opt/plugins/test_peft_save_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/opt/plugins/test_peft_save_load.py -------------------------------------------------------------------------------- /tests/unit/torch/opt/plugins/test_transformers_save_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/opt/plugins/test_transformers_save_load.py -------------------------------------------------------------------------------- /tests/unit/torch/opt/test_chaining.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/opt/test_chaining.py -------------------------------------------------------------------------------- /tests/unit/torch/opt/test_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/opt/test_config.py -------------------------------------------------------------------------------- /tests/unit/torch/opt/test_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/opt/test_dynamic.py -------------------------------------------------------------------------------- /tests/unit/torch/opt/test_mode_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/opt/test_mode_registry.py -------------------------------------------------------------------------------- /tests/unit/torch/prune/test_algorithms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/prune/test_algorithms.py -------------------------------------------------------------------------------- /tests/unit/torch/prune/test_fastnas_conversion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/prune/test_fastnas_conversion.py -------------------------------------------------------------------------------- /tests/unit/torch/prune/test_gradnas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/prune/test_gradnas.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/plugins/test_accelerate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/plugins/test_accelerate.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/plugins/test_attention_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/plugins/test_attention_quant.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/plugins/test_huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/plugins/test_huggingface.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/plugins/test_peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/plugins/test_peft.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_affine_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_affine_quant.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_autoquant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_autoquant.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_calib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_calib.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_calibrator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_calibrator.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_config_validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_config_validation.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_dist.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_mode.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_module_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_module_registry.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_mse_calibrator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_mse_calibrator.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_onnx_export_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_onnx_export_cpu.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_print.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_print.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_quant_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_quant_activations.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_quant_batchnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_quant_batchnorm.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_quant_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_quant_conv.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_quant_instancenorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_quant_instancenorm.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_quant_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_quant_linear.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_quant_pooling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_quant_pooling.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_quant_rnn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_quant_rnn.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_quantize_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_quantize_cpu.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_quantize_replace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_quantize_replace.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_tensor_quant_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_tensor_quant_cpu.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_tensor_quantizer_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_tensor_quantizer_cpu.py -------------------------------------------------------------------------------- /tests/unit/torch/quantization/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/quantization/test_utils.py -------------------------------------------------------------------------------- /tests/unit/torch/sparsity/weight_sparsity/test_sparsify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/sparsity/weight_sparsity/test_sparsify.py -------------------------------------------------------------------------------- /tests/unit/torch/speculative/plugins/test_hf_speculative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/speculative/plugins/test_hf_speculative.py -------------------------------------------------------------------------------- /tests/unit/torch/trace/test_analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/trace/test_analyzer.py -------------------------------------------------------------------------------- /tests/unit/torch/trace/test_concat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/trace/test_concat.py -------------------------------------------------------------------------------- /tests/unit/torch/trace/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/trace/test_model.py -------------------------------------------------------------------------------- /tests/unit/torch/trace/test_nn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/trace/test_nn.py -------------------------------------------------------------------------------- /tests/unit/torch/trace/test_symbol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/trace/test_symbol.py -------------------------------------------------------------------------------- /tests/unit/torch/trace/test_tracer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/trace/test_tracer.py -------------------------------------------------------------------------------- /tests/unit/torch/utils/test_dataset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/utils/test_dataset_utils.py -------------------------------------------------------------------------------- /tests/unit/torch/utils/test_megatron_preprocess_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/utils/test_megatron_preprocess_data.py -------------------------------------------------------------------------------- /tests/unit/torch/utils/test_network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/utils/test_network.py -------------------------------------------------------------------------------- /tests/unit/torch/utils/test_pytree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/utils/test_pytree.py -------------------------------------------------------------------------------- /tests/unit/torch/utils/test_regex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/utils/test_regex.py -------------------------------------------------------------------------------- /tests/unit/torch/utils/test_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tests/unit/torch/utils/test_tensor.py -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/TensorRT-Model-Optimizer/HEAD/tox.ini --------------------------------------------------------------------------------