├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ ├── config.yml │ └── feature-request.yml ├── PULL_REQUEST_TEMPLATE.md ├── actions │ ├── install_neuronx_runtime │ │ └── action.yml │ ├── install_optimum_neuron │ │ └── action.yml │ └── prepare_venv │ │ └── action.yml └── workflows │ ├── build-ami.yml │ ├── build-vllm-image.yml │ ├── cache_diffusion.yml │ ├── cache_llm.yml │ ├── check_code_quality.yml │ ├── disabled │ └── precompile_tests.yml │ ├── doc-build.yml │ ├── doc-pr-build.yml │ ├── doc-pr-upload.yml │ ├── security.yml │ ├── stale.yaml │ ├── test_cpu_compilation.yml │ ├── test_cpu_lookup.yml │ ├── test_inf2_diffusers.yml │ ├── test_inf2_export.yml │ ├── test_inf2_llm.yml │ ├── test_inf2_seq2seq.yml │ ├── test_inf2_slow.yml │ ├── test_inf2_transformers.yml │ ├── test_inf2_vllm.yml │ ├── test_sagemaker.yml │ └── test_trainium_training.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── benchmark └── vllm │ ├── README.md │ ├── accuracy.sh │ ├── data-parallel │ ├── README.md │ ├── llama3-70B-trn2 │ │ ├── .env │ │ ├── docker-compose.yaml │ │ └── nginx.conf │ ├── llama3.1-8b │ │ ├── .env │ │ ├── docker-compose-dp3.yaml │ │ ├── docker-compose-dp4.yaml │ │ ├── nginx-dp3.conf │ │ ├── nginx-dp4.conf │ │ ├── vllm-results-dp3.csv │ │ └── vllm-results-dp4.csv │ └── qwen3-30B-A3B │ │ ├── .env │ │ ├── docker-compose.yaml │ │ └── nginx.conf │ ├── generate_csv.py │ ├── performance.sh │ └── single-instance │ ├── README.md │ ├── llama-3.1-8b-trn2 │ ├── .env │ └── vllm-results.csv │ ├── llama4-Maverick-trn2 │ ├── .env │ └── vllm-results.csv │ ├── llama4-Scout-trn2 │ ├── .env │ └── vllm-results.csv │ ├── llama4-Scout │ ├── .env │ └── vllm-results.csv │ ├── qwen3-235B-A22B-trn2 │ ├── .env │ └── vllm-results.csv │ ├── qwen3-30B-A3B-trn2 │ ├── .env │ └── vllm-results.csv │ ├── qwen3-30B-A3B │ ├── .env │ └── vllm-results.csv │ ├── qwen3-32B-trn2 │ ├── .env │ └── vllm-results.csv │ └── serve.sh ├── docker └── vllm │ └── Dockerfile ├── docs ├── README.md ├── assets │ ├── benchmarks │ │ ├── inferentia-llama3.1-8b │ │ │ ├── latency.png │ │ │ ├── throughput.png │ │ │ └── ttft.png │ │ └── inferentia-llama3.3-70b │ │ │ ├── latency.png │ │ │ ├── throughput.png │ │ │ └── ttft.png │ └── guides │ │ ├── models │ │ ├── 01-sd-image.png │ │ ├── 02-sdxl-image.jpeg │ │ └── 03-sd-lora.png │ │ └── setup_aws_instance │ │ ├── 01-name-instance.png │ │ ├── 02-search-ami.png │ │ ├── 03-select-ami.png │ │ ├── 04-select-key.png │ │ ├── 05-select-sg.png │ │ ├── 06-launch-instance.png │ │ └── 07-copy-dns.png └── source │ ├── _toctree.yml │ ├── benchmarks │ ├── inferentia-llama3.1-8b.mdx │ └── inferentia-llama3.3-70b.mdx │ ├── containers.mdx │ ├── contribute │ ├── contribute_for_inference.mdx │ ├── contribute_for_training.mdx │ └── dev_environment.mdx │ ├── ec2-setup.mdx │ ├── guides │ ├── benchmark.mdx │ ├── cache_system.mdx │ ├── distributed_training.mdx │ ├── export_model.mdx │ ├── neuronx_tgi.mdx │ ├── pipelines.mdx │ └── vllm_plugin.mdx │ ├── index.mdx │ ├── inference_tutorials │ ├── llama2-13b-chatbot.mdx │ ├── notebooks.mdx │ └── sentence_transformers.mdx │ ├── model_doc │ ├── diffusers │ │ ├── controlnet.mdx │ │ ├── flux.mdx │ │ ├── ip_adapter.mdx │ │ ├── lcm.mdx │ │ ├── lora.mdx │ │ ├── pix2pix.mdx │ │ ├── pixart_alpha.mdx │ │ ├── pixart_sigma.mdx │ │ ├── sdxl_turbo.mdx │ │ ├── stable_diffusion.mdx │ │ └── stable_diffusion_xl.mdx │ ├── modeling_auto.mdx │ ├── sentence_transformers │ │ └── overview.mdx │ └── transformers │ │ ├── bert.mdx │ │ ├── clip.mdx │ │ ├── whisper.mdx │ │ └── yolos.mdx │ ├── quickstart.mdx │ ├── supported_architectures.mdx │ ├── training_api │ ├── lora.mdx │ ├── trainer.mdx │ ├── transformations.mdx │ └── trl_trainers.mdx │ └── training_tutorials │ ├── amazon_eks │ ├── Dockerfile │ ├── generate-jobspec.sh │ └── llama3_train.yaml-template │ ├── finetune_llama.mdx │ ├── finetune_llms_overview.mdx │ ├── finetune_qwen3.mdx │ └── pretraining_hyperpod_llm.mdx ├── examples ├── inference │ └── text-generation │ │ └── generation.py └── training │ ├── llama │ ├── README.md │ ├── finetune_llama.py │ └── finetune_llama.sh │ └── qwen3 │ ├── README.md │ ├── finetune_qwen3.py │ └── finetune_qwen3.sh ├── infrastructure └── ami │ ├── README.md │ ├── hcl2-files │ ├── build.pkr.hcl │ ├── packer.pkr.hcl │ ├── sources.pkr.hcl │ └── variables.pkr.hcl │ └── scripts │ ├── install-huggingface-libraries.sh │ ├── validate-neuron.sh │ └── welcome-msg.sh ├── notebooks ├── README.md ├── sagemaker │ ├── deploy-llama-3-3-70b.ipynb │ └── deploy-mixtral-8x7b.ipynb ├── sentence-transformers │ └── getting-started.ipynb ├── stable-diffusion │ ├── stable-diffusion-txt2img.ipynb │ └── stable-diffusion-xl-txt2img.ipynb ├── text-classification │ ├── fine_tune_bert.ipynb │ └── scripts │ │ └── train.py └── text-generation │ ├── CodeLlama-7B-Compilation.ipynb │ ├── llama2-13b-chatbot.ipynb │ ├── llama2-7b-fine-tuning.ipynb │ └── scripts │ ├── run_clm.py │ ├── sft_finetuning_qwen3.py │ ├── sft_finetuning_qwen3.sh │ └── utils │ └── pack_dataset.py ├── optimum ├── commands │ ├── env.py │ ├── export │ │ ├── neuron.py │ │ └── neuronx.py │ ├── neuron │ │ ├── base.py │ │ ├── cache.py │ │ ├── serve.py │ │ └── subcommands.py │ └── register │ │ ├── register_export.py │ │ └── register_neuron.py ├── exporters │ └── neuron │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── base.py │ │ ├── config.py │ │ ├── convert.py │ │ ├── model_configs.py │ │ ├── model_wrappers.py │ │ └── utils.py └── neuron │ ├── __init__.py │ ├── accelerate │ ├── __init__.py │ ├── accelerator.py │ ├── optimizer.py │ ├── scheduler.py │ ├── state.py │ └── utils │ │ ├── __init__.py │ │ ├── dataclasses.py │ │ ├── misc.py │ │ └── operations.py │ ├── cache │ ├── __init__.py │ ├── entries │ │ ├── cache_entry.py │ │ ├── multi_model.py │ │ └── single_model.py │ ├── hub_cache.py │ ├── optimum_neuron_cc_wrapper.py │ ├── traced.py │ └── training.py │ ├── configuration_utils.py │ ├── generation │ ├── __init__.py │ ├── logits_process.py │ ├── token_selector.py │ └── utils.py │ ├── hf_argparser.py │ ├── modeling.py │ ├── modeling_base.py │ ├── modeling_diffusion.py │ ├── modeling_sentence_transformers.py │ ├── modeling_seq2seq.py │ ├── modeling_traced.py │ ├── models │ ├── __init__.py │ ├── auto_model.py │ ├── inference │ │ ├── auto_models.py │ │ ├── backend │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── graph_builder.py │ │ │ ├── model_wrapper.py │ │ │ ├── modules │ │ │ │ ├── attention │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── attention_base.py │ │ │ │ │ ├── gqa.py │ │ │ │ │ ├── rope.py │ │ │ │ │ └── utils.py │ │ │ │ ├── checkpoint.py │ │ │ │ ├── decoder │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── decoder_builders.py │ │ │ │ │ ├── decoder_wrappers.py │ │ │ │ │ └── modeling_decoder.py │ │ │ │ ├── generation │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── generation_utils.py │ │ │ │ │ └── sampling.py │ │ │ │ ├── kvcache │ │ │ │ │ ├── kv_cache_manager.py │ │ │ │ │ └── utils.py │ │ │ │ ├── moe.py │ │ │ │ ├── moe_v2.py │ │ │ │ └── rms_norm.py │ │ │ ├── pretrained_model.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── distributed.py │ │ │ │ ├── layer_boundary_marker.py │ │ │ │ └── random.py │ │ ├── bert │ │ │ ├── __init__.py │ │ │ └── modeling_bert.py │ │ ├── clip │ │ │ ├── __init__.py │ │ │ └── modeling_clip.py │ │ ├── flux │ │ │ ├── flux_transformer_2d │ │ │ │ ├── modeling_flux_transformer_2d.py │ │ │ │ └── modules │ │ │ │ │ ├── activations.py │ │ │ │ │ ├── embeddings.py │ │ │ │ │ ├── kernels.py │ │ │ │ │ └── normalization.py │ │ │ └── t5 │ │ │ │ └── modeling_t5.py │ │ ├── granite │ │ │ └── modeling_granite.py │ │ ├── llama │ │ │ └── modeling_llama.py │ │ ├── llama4 │ │ │ └── modeling_llama4.py │ │ ├── mixtral │ │ │ └── modeling_mixtral.py │ │ ├── modeling_utils.py │ │ ├── phi3 │ │ │ └── modeling_phi3.py │ │ ├── qwen2 │ │ │ └── modeling_qwen2.py │ │ ├── qwen3 │ │ │ └── modeling_qwen3.py │ │ ├── qwen3_moe │ │ │ └── modeling_qwen3_moe.py │ │ ├── smollm3 │ │ │ └── modeling_smollm3.py │ │ ├── t5 │ │ │ └── modeling_t5.py │ │ ├── whisper │ │ │ ├── __init__.py │ │ │ └── modeling_whisper.py │ │ └── yolos │ │ │ ├── __init__.py │ │ │ └── modeling_yolos.py │ ├── neuron_config.py │ └── training │ │ ├── __init__.py │ │ ├── auto_models.py │ │ ├── checkpointing.py │ │ ├── config.py │ │ ├── granite │ │ ├── __init__.py │ │ └── modeling_granite.py │ │ ├── llama │ │ └── modeling_llama.py │ │ ├── loss_utils.py │ │ ├── masking_utils.py │ │ ├── modeling_auto.py │ │ ├── modeling_utils.py │ │ ├── pipeline_utils.py │ │ ├── qwen3 │ │ ├── __init__.py │ │ └── modeling_qwen3.py │ │ ├── training_utils.py │ │ └── transformations_utils.py │ ├── peft │ ├── __init__.py │ ├── mapping.py │ ├── mapping_func.py │ ├── peft_model.py │ ├── tuners │ │ ├── __init__.py │ │ └── lora │ │ │ ├── __init__.py │ │ │ ├── layer.py │ │ │ └── model.py │ └── utils │ │ ├── __init__.py │ │ └── save_and_load.py │ ├── pipelines │ ├── __init__.py │ ├── diffusers │ │ ├── __init__.py │ │ ├── pipeline_controlnet.py │ │ ├── pipeline_controlnet_sd_xl.py │ │ └── pipeline_utils.py │ └── transformers │ │ ├── __init__.py │ │ ├── base.py │ │ └── sentence_transformers.py │ ├── trainers │ ├── __init__.py │ ├── metrics │ │ ├── __init__.py │ │ ├── base.py │ │ ├── collector.py │ │ ├── constants.py │ │ ├── efficiency.py │ │ ├── mfu.py │ │ ├── registry.py │ │ ├── throughput.py │ │ ├── timing.py │ │ └── window.py │ ├── sft_config.py │ ├── sft_trainer.py │ ├── training_args.py │ ├── transformers.py │ ├── trl_utils.py │ └── utils.py │ ├── utils │ ├── __init__.py │ ├── argument_utils.py │ ├── cache_utils.py │ ├── constant.py │ ├── deprecate_utils.py │ ├── doc.py │ ├── ecr.py │ ├── import_utils.py │ ├── input_generators.py │ ├── instance.py │ ├── misc.py │ ├── model_utils.py │ ├── neuron_cc_wrapper │ ├── neuron_device_memory.py │ ├── neuron_parallel_compile.py │ ├── optimization_utils.py │ ├── patching.py │ ├── require_utils.py │ ├── runner.py │ ├── system.py │ ├── testing_utils.py │ ├── torch_xla_and_neuronx_initialization.py │ └── version_utils.py │ ├── version.py │ └── vllm │ ├── __init__.py │ ├── model_loader.py │ ├── platform.py │ ├── plugin.py │ ├── runner.py │ └── worker.py ├── pyproject.toml ├── tests ├── conftest.py ├── decoder │ ├── nxd_testing.py │ ├── test_attention.py │ ├── test_cache.py │ ├── test_cli.py │ ├── test_decoder_config.py │ ├── test_decoder_embedding.py │ ├── test_decoder_export.py │ ├── test_decoder_generation.py │ ├── test_decoder_hub.py │ ├── test_decoder_pipelines.py │ ├── test_device_memory.py │ ├── test_fused_logits_warper.py │ └── test_modules.py ├── exporters │ ├── __init__.py │ ├── exporters_utils.py │ ├── test_cpu_compilation.py │ ├── test_diffusers.py │ └── test_transformers.py ├── fixtures │ └── llm │ │ ├── export_models.py │ │ ├── vllm_docker_service.py │ │ └── vllm_service.py ├── inference │ ├── __init__.py │ ├── cache_utils.py │ ├── conftest.py │ ├── diffusers │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_cache.py │ │ ├── test_export_cli.py │ │ ├── test_flux.py │ │ ├── test_pixart.py │ │ └── test_stable_diffusion.py │ ├── inference_utils.py │ ├── seq2seq │ │ ├── conftest.py │ │ ├── test_export.py │ │ ├── test_export_cli.py │ │ ├── test_generate.py │ │ ├── test_hub.py │ │ └── test_parallel.py │ └── transformers │ │ ├── __init__.py │ │ ├── test_cache.py │ │ ├── test_export_cli.py │ │ └── test_modeling.py ├── pipelines │ ├── conftest.py │ └── test_encoder_pipelines.py ├── pytest.ini ├── sagemaker │ └── test_images_uri.py ├── training │ ├── README.md │ ├── __init__.py │ ├── distributed_utils.py │ ├── synchronize_with_cache_repo_for_ci.py │ ├── test_checkpointing.py │ ├── test_custom_modeling.py │ ├── test_distributed_utils.py │ ├── test_flash_attn.py │ ├── test_linears.py │ ├── test_metrics.py │ ├── test_mixed_precision.py │ ├── test_modeling_auto.py │ ├── test_neuron_sft_trainer.py │ ├── test_neuron_trainer.py │ ├── test_optimizer.py │ ├── test_overfit.py │ ├── test_zero1.py │ └── utils.py └── vllm │ ├── docker │ └── test_vllm_docker_service_generate.py │ ├── engine │ └── test_vllm_engine_generate.py │ └── service │ ├── test_vllm_agentic.py │ ├── test_vllm_model_config.py │ └── test_vllm_service_generate.py └── tools ├── cache ├── auto_fill_diffusion_cache.py └── auto_fill_llm_cache.py ├── decode_hlos.py ├── list_top_models.py └── prune_test_models.py /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/ISSUE_TEMPLATE/bug-report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/ISSUE_TEMPLATE/config.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/ISSUE_TEMPLATE/feature-request.yml -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/actions/install_neuronx_runtime/action.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/actions/install_neuronx_runtime/action.yml -------------------------------------------------------------------------------- /.github/actions/install_optimum_neuron/action.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/actions/install_optimum_neuron/action.yml -------------------------------------------------------------------------------- /.github/actions/prepare_venv/action.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/actions/prepare_venv/action.yml -------------------------------------------------------------------------------- /.github/workflows/build-ami.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/build-ami.yml -------------------------------------------------------------------------------- /.github/workflows/build-vllm-image.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/build-vllm-image.yml -------------------------------------------------------------------------------- /.github/workflows/cache_diffusion.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/cache_diffusion.yml -------------------------------------------------------------------------------- /.github/workflows/cache_llm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/cache_llm.yml -------------------------------------------------------------------------------- /.github/workflows/check_code_quality.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/check_code_quality.yml -------------------------------------------------------------------------------- /.github/workflows/disabled/precompile_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/disabled/precompile_tests.yml -------------------------------------------------------------------------------- /.github/workflows/doc-build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/doc-build.yml -------------------------------------------------------------------------------- /.github/workflows/doc-pr-build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/doc-pr-build.yml -------------------------------------------------------------------------------- /.github/workflows/doc-pr-upload.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/doc-pr-upload.yml -------------------------------------------------------------------------------- /.github/workflows/security.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/security.yml -------------------------------------------------------------------------------- /.github/workflows/stale.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/stale.yaml -------------------------------------------------------------------------------- /.github/workflows/test_cpu_compilation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_cpu_compilation.yml -------------------------------------------------------------------------------- /.github/workflows/test_cpu_lookup.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_cpu_lookup.yml -------------------------------------------------------------------------------- /.github/workflows/test_inf2_diffusers.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_inf2_diffusers.yml -------------------------------------------------------------------------------- /.github/workflows/test_inf2_export.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_inf2_export.yml -------------------------------------------------------------------------------- /.github/workflows/test_inf2_llm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_inf2_llm.yml -------------------------------------------------------------------------------- /.github/workflows/test_inf2_seq2seq.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_inf2_seq2seq.yml -------------------------------------------------------------------------------- /.github/workflows/test_inf2_slow.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_inf2_slow.yml -------------------------------------------------------------------------------- /.github/workflows/test_inf2_transformers.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_inf2_transformers.yml -------------------------------------------------------------------------------- /.github/workflows/test_inf2_vllm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_inf2_vllm.yml -------------------------------------------------------------------------------- /.github/workflows/test_sagemaker.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_sagemaker.yml -------------------------------------------------------------------------------- /.github/workflows/test_trainium_training.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.github/workflows/test_trainium_training.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/README.md -------------------------------------------------------------------------------- /benchmark/vllm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/README.md -------------------------------------------------------------------------------- /benchmark/vllm/accuracy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/accuracy.sh -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/README.md -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/llama3-70B-trn2/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/llama3-70B-trn2/.env -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/llama3-70B-trn2/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/llama3-70B-trn2/docker-compose.yaml -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/llama3-70B-trn2/nginx.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/llama3-70B-trn2/nginx.conf -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/llama3.1-8b/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/llama3.1-8b/.env -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/llama3.1-8b/docker-compose-dp3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/llama3.1-8b/docker-compose-dp3.yaml -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/llama3.1-8b/docker-compose-dp4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/llama3.1-8b/docker-compose-dp4.yaml -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/llama3.1-8b/nginx-dp3.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/llama3.1-8b/nginx-dp3.conf -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/llama3.1-8b/nginx-dp4.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/llama3.1-8b/nginx-dp4.conf -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/llama3.1-8b/vllm-results-dp3.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/llama3.1-8b/vllm-results-dp3.csv -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/llama3.1-8b/vllm-results-dp4.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/llama3.1-8b/vllm-results-dp4.csv -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/qwen3-30B-A3B/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/qwen3-30B-A3B/.env -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/qwen3-30B-A3B/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/qwen3-30B-A3B/docker-compose.yaml -------------------------------------------------------------------------------- /benchmark/vllm/data-parallel/qwen3-30B-A3B/nginx.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/data-parallel/qwen3-30B-A3B/nginx.conf -------------------------------------------------------------------------------- /benchmark/vllm/generate_csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/generate_csv.py -------------------------------------------------------------------------------- /benchmark/vllm/performance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/performance.sh -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/README.md -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/llama-3.1-8b-trn2/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/llama-3.1-8b-trn2/.env -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/llama-3.1-8b-trn2/vllm-results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/llama-3.1-8b-trn2/vllm-results.csv -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/llama4-Maverick-trn2/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/llama4-Maverick-trn2/.env -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/llama4-Maverick-trn2/vllm-results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/llama4-Maverick-trn2/vllm-results.csv -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/llama4-Scout-trn2/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/llama4-Scout-trn2/.env -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/llama4-Scout-trn2/vllm-results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/llama4-Scout-trn2/vllm-results.csv -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/llama4-Scout/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/llama4-Scout/.env -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/llama4-Scout/vllm-results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/llama4-Scout/vllm-results.csv -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/qwen3-235B-A22B-trn2/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/qwen3-235B-A22B-trn2/.env -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/qwen3-235B-A22B-trn2/vllm-results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/qwen3-235B-A22B-trn2/vllm-results.csv -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/qwen3-30B-A3B-trn2/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/qwen3-30B-A3B-trn2/.env -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/qwen3-30B-A3B-trn2/vllm-results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/qwen3-30B-A3B-trn2/vllm-results.csv -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/qwen3-30B-A3B/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/qwen3-30B-A3B/.env -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/qwen3-30B-A3B/vllm-results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/qwen3-30B-A3B/vllm-results.csv -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/qwen3-32B-trn2/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/qwen3-32B-trn2/.env -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/qwen3-32B-trn2/vllm-results.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/qwen3-32B-trn2/vllm-results.csv -------------------------------------------------------------------------------- /benchmark/vllm/single-instance/serve.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/benchmark/vllm/single-instance/serve.sh -------------------------------------------------------------------------------- /docker/vllm/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docker/vllm/Dockerfile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/assets/benchmarks/inferentia-llama3.1-8b/latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/benchmarks/inferentia-llama3.1-8b/latency.png -------------------------------------------------------------------------------- /docs/assets/benchmarks/inferentia-llama3.1-8b/throughput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/benchmarks/inferentia-llama3.1-8b/throughput.png -------------------------------------------------------------------------------- /docs/assets/benchmarks/inferentia-llama3.1-8b/ttft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/benchmarks/inferentia-llama3.1-8b/ttft.png -------------------------------------------------------------------------------- /docs/assets/benchmarks/inferentia-llama3.3-70b/latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/benchmarks/inferentia-llama3.3-70b/latency.png -------------------------------------------------------------------------------- /docs/assets/benchmarks/inferentia-llama3.3-70b/throughput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/benchmarks/inferentia-llama3.3-70b/throughput.png -------------------------------------------------------------------------------- /docs/assets/benchmarks/inferentia-llama3.3-70b/ttft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/benchmarks/inferentia-llama3.3-70b/ttft.png -------------------------------------------------------------------------------- /docs/assets/guides/models/01-sd-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/guides/models/01-sd-image.png -------------------------------------------------------------------------------- /docs/assets/guides/models/02-sdxl-image.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/guides/models/02-sdxl-image.jpeg -------------------------------------------------------------------------------- /docs/assets/guides/models/03-sd-lora.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/guides/models/03-sd-lora.png -------------------------------------------------------------------------------- /docs/assets/guides/setup_aws_instance/01-name-instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/guides/setup_aws_instance/01-name-instance.png -------------------------------------------------------------------------------- /docs/assets/guides/setup_aws_instance/02-search-ami.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/guides/setup_aws_instance/02-search-ami.png -------------------------------------------------------------------------------- /docs/assets/guides/setup_aws_instance/03-select-ami.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/guides/setup_aws_instance/03-select-ami.png -------------------------------------------------------------------------------- /docs/assets/guides/setup_aws_instance/04-select-key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/guides/setup_aws_instance/04-select-key.png -------------------------------------------------------------------------------- /docs/assets/guides/setup_aws_instance/05-select-sg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/guides/setup_aws_instance/05-select-sg.png -------------------------------------------------------------------------------- /docs/assets/guides/setup_aws_instance/06-launch-instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/guides/setup_aws_instance/06-launch-instance.png -------------------------------------------------------------------------------- /docs/assets/guides/setup_aws_instance/07-copy-dns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/assets/guides/setup_aws_instance/07-copy-dns.png -------------------------------------------------------------------------------- /docs/source/_toctree.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/_toctree.yml -------------------------------------------------------------------------------- /docs/source/benchmarks/inferentia-llama3.1-8b.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/benchmarks/inferentia-llama3.1-8b.mdx -------------------------------------------------------------------------------- /docs/source/benchmarks/inferentia-llama3.3-70b.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/benchmarks/inferentia-llama3.3-70b.mdx -------------------------------------------------------------------------------- /docs/source/containers.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/containers.mdx -------------------------------------------------------------------------------- /docs/source/contribute/contribute_for_inference.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/contribute/contribute_for_inference.mdx -------------------------------------------------------------------------------- /docs/source/contribute/contribute_for_training.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/contribute/contribute_for_training.mdx -------------------------------------------------------------------------------- /docs/source/contribute/dev_environment.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/contribute/dev_environment.mdx -------------------------------------------------------------------------------- /docs/source/ec2-setup.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/ec2-setup.mdx -------------------------------------------------------------------------------- /docs/source/guides/benchmark.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/guides/benchmark.mdx -------------------------------------------------------------------------------- /docs/source/guides/cache_system.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/guides/cache_system.mdx -------------------------------------------------------------------------------- /docs/source/guides/distributed_training.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/guides/distributed_training.mdx -------------------------------------------------------------------------------- /docs/source/guides/export_model.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/guides/export_model.mdx -------------------------------------------------------------------------------- /docs/source/guides/neuronx_tgi.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/guides/neuronx_tgi.mdx -------------------------------------------------------------------------------- /docs/source/guides/pipelines.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/guides/pipelines.mdx -------------------------------------------------------------------------------- /docs/source/guides/vllm_plugin.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/guides/vllm_plugin.mdx -------------------------------------------------------------------------------- /docs/source/index.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/index.mdx -------------------------------------------------------------------------------- /docs/source/inference_tutorials/llama2-13b-chatbot.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/inference_tutorials/llama2-13b-chatbot.mdx -------------------------------------------------------------------------------- /docs/source/inference_tutorials/notebooks.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/inference_tutorials/notebooks.mdx -------------------------------------------------------------------------------- /docs/source/inference_tutorials/sentence_transformers.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/inference_tutorials/sentence_transformers.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/controlnet.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/controlnet.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/flux.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/flux.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/ip_adapter.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/ip_adapter.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/lcm.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/lcm.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/lora.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/lora.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/pix2pix.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/pix2pix.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/pixart_alpha.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/pixart_alpha.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/pixart_sigma.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/pixart_sigma.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/sdxl_turbo.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/sdxl_turbo.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/stable_diffusion.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/stable_diffusion.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/diffusers/stable_diffusion_xl.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/diffusers/stable_diffusion_xl.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/modeling_auto.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/modeling_auto.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/sentence_transformers/overview.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/sentence_transformers/overview.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/transformers/bert.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/transformers/bert.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/transformers/clip.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/transformers/clip.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/transformers/whisper.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/transformers/whisper.mdx -------------------------------------------------------------------------------- /docs/source/model_doc/transformers/yolos.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/model_doc/transformers/yolos.mdx -------------------------------------------------------------------------------- /docs/source/quickstart.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/quickstart.mdx -------------------------------------------------------------------------------- /docs/source/supported_architectures.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/supported_architectures.mdx -------------------------------------------------------------------------------- /docs/source/training_api/lora.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_api/lora.mdx -------------------------------------------------------------------------------- /docs/source/training_api/trainer.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_api/trainer.mdx -------------------------------------------------------------------------------- /docs/source/training_api/transformations.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_api/transformations.mdx -------------------------------------------------------------------------------- /docs/source/training_api/trl_trainers.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_api/trl_trainers.mdx -------------------------------------------------------------------------------- /docs/source/training_tutorials/amazon_eks/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_tutorials/amazon_eks/Dockerfile -------------------------------------------------------------------------------- /docs/source/training_tutorials/amazon_eks/generate-jobspec.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_tutorials/amazon_eks/generate-jobspec.sh -------------------------------------------------------------------------------- /docs/source/training_tutorials/amazon_eks/llama3_train.yaml-template: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_tutorials/amazon_eks/llama3_train.yaml-template -------------------------------------------------------------------------------- /docs/source/training_tutorials/finetune_llama.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_tutorials/finetune_llama.mdx -------------------------------------------------------------------------------- /docs/source/training_tutorials/finetune_llms_overview.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_tutorials/finetune_llms_overview.mdx -------------------------------------------------------------------------------- /docs/source/training_tutorials/finetune_qwen3.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_tutorials/finetune_qwen3.mdx -------------------------------------------------------------------------------- /docs/source/training_tutorials/pretraining_hyperpod_llm.mdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/docs/source/training_tutorials/pretraining_hyperpod_llm.mdx -------------------------------------------------------------------------------- /examples/inference/text-generation/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/examples/inference/text-generation/generation.py -------------------------------------------------------------------------------- /examples/training/llama/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/examples/training/llama/README.md -------------------------------------------------------------------------------- /examples/training/llama/finetune_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/examples/training/llama/finetune_llama.py -------------------------------------------------------------------------------- /examples/training/llama/finetune_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/examples/training/llama/finetune_llama.sh -------------------------------------------------------------------------------- /examples/training/qwen3/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/examples/training/qwen3/README.md -------------------------------------------------------------------------------- /examples/training/qwen3/finetune_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/examples/training/qwen3/finetune_qwen3.py -------------------------------------------------------------------------------- /examples/training/qwen3/finetune_qwen3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/examples/training/qwen3/finetune_qwen3.sh -------------------------------------------------------------------------------- /infrastructure/ami/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/infrastructure/ami/README.md -------------------------------------------------------------------------------- /infrastructure/ami/hcl2-files/build.pkr.hcl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/infrastructure/ami/hcl2-files/build.pkr.hcl -------------------------------------------------------------------------------- /infrastructure/ami/hcl2-files/packer.pkr.hcl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/infrastructure/ami/hcl2-files/packer.pkr.hcl -------------------------------------------------------------------------------- /infrastructure/ami/hcl2-files/sources.pkr.hcl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/infrastructure/ami/hcl2-files/sources.pkr.hcl -------------------------------------------------------------------------------- /infrastructure/ami/hcl2-files/variables.pkr.hcl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/infrastructure/ami/hcl2-files/variables.pkr.hcl -------------------------------------------------------------------------------- /infrastructure/ami/scripts/install-huggingface-libraries.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/infrastructure/ami/scripts/install-huggingface-libraries.sh -------------------------------------------------------------------------------- /infrastructure/ami/scripts/validate-neuron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/infrastructure/ami/scripts/validate-neuron.sh -------------------------------------------------------------------------------- /infrastructure/ami/scripts/welcome-msg.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/infrastructure/ami/scripts/welcome-msg.sh -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/README.md -------------------------------------------------------------------------------- /notebooks/sagemaker/deploy-llama-3-3-70b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/sagemaker/deploy-llama-3-3-70b.ipynb -------------------------------------------------------------------------------- /notebooks/sagemaker/deploy-mixtral-8x7b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/sagemaker/deploy-mixtral-8x7b.ipynb -------------------------------------------------------------------------------- /notebooks/sentence-transformers/getting-started.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/sentence-transformers/getting-started.ipynb -------------------------------------------------------------------------------- /notebooks/stable-diffusion/stable-diffusion-txt2img.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/stable-diffusion/stable-diffusion-txt2img.ipynb -------------------------------------------------------------------------------- /notebooks/stable-diffusion/stable-diffusion-xl-txt2img.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/stable-diffusion/stable-diffusion-xl-txt2img.ipynb -------------------------------------------------------------------------------- /notebooks/text-classification/fine_tune_bert.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/text-classification/fine_tune_bert.ipynb -------------------------------------------------------------------------------- /notebooks/text-classification/scripts/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/text-classification/scripts/train.py -------------------------------------------------------------------------------- /notebooks/text-generation/CodeLlama-7B-Compilation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/text-generation/CodeLlama-7B-Compilation.ipynb -------------------------------------------------------------------------------- /notebooks/text-generation/llama2-13b-chatbot.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/text-generation/llama2-13b-chatbot.ipynb -------------------------------------------------------------------------------- /notebooks/text-generation/llama2-7b-fine-tuning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/text-generation/llama2-7b-fine-tuning.ipynb -------------------------------------------------------------------------------- /notebooks/text-generation/scripts/run_clm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/text-generation/scripts/run_clm.py -------------------------------------------------------------------------------- /notebooks/text-generation/scripts/sft_finetuning_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/text-generation/scripts/sft_finetuning_qwen3.py -------------------------------------------------------------------------------- /notebooks/text-generation/scripts/sft_finetuning_qwen3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/text-generation/scripts/sft_finetuning_qwen3.sh -------------------------------------------------------------------------------- /notebooks/text-generation/scripts/utils/pack_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/notebooks/text-generation/scripts/utils/pack_dataset.py -------------------------------------------------------------------------------- /optimum/commands/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/commands/env.py -------------------------------------------------------------------------------- /optimum/commands/export/neuron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/commands/export/neuron.py -------------------------------------------------------------------------------- /optimum/commands/export/neuronx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/commands/export/neuronx.py -------------------------------------------------------------------------------- /optimum/commands/neuron/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/commands/neuron/base.py -------------------------------------------------------------------------------- /optimum/commands/neuron/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/commands/neuron/cache.py -------------------------------------------------------------------------------- /optimum/commands/neuron/serve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/commands/neuron/serve.py -------------------------------------------------------------------------------- /optimum/commands/neuron/subcommands.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/commands/neuron/subcommands.py -------------------------------------------------------------------------------- /optimum/commands/register/register_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/commands/register/register_export.py -------------------------------------------------------------------------------- /optimum/commands/register/register_neuron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/commands/register/register_neuron.py -------------------------------------------------------------------------------- /optimum/exporters/neuron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/exporters/neuron/__init__.py -------------------------------------------------------------------------------- /optimum/exporters/neuron/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/exporters/neuron/__main__.py -------------------------------------------------------------------------------- /optimum/exporters/neuron/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/exporters/neuron/base.py -------------------------------------------------------------------------------- /optimum/exporters/neuron/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/exporters/neuron/config.py -------------------------------------------------------------------------------- /optimum/exporters/neuron/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/exporters/neuron/convert.py -------------------------------------------------------------------------------- /optimum/exporters/neuron/model_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/exporters/neuron/model_configs.py -------------------------------------------------------------------------------- /optimum/exporters/neuron/model_wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/exporters/neuron/model_wrappers.py -------------------------------------------------------------------------------- /optimum/exporters/neuron/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/exporters/neuron/utils.py -------------------------------------------------------------------------------- /optimum/neuron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/accelerate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/accelerate/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/accelerate/accelerator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/accelerate/accelerator.py -------------------------------------------------------------------------------- /optimum/neuron/accelerate/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/accelerate/optimizer.py -------------------------------------------------------------------------------- /optimum/neuron/accelerate/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/accelerate/scheduler.py -------------------------------------------------------------------------------- /optimum/neuron/accelerate/state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/accelerate/state.py -------------------------------------------------------------------------------- /optimum/neuron/accelerate/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/accelerate/utils/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/accelerate/utils/dataclasses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/accelerate/utils/dataclasses.py -------------------------------------------------------------------------------- /optimum/neuron/accelerate/utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/accelerate/utils/misc.py -------------------------------------------------------------------------------- /optimum/neuron/accelerate/utils/operations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/accelerate/utils/operations.py -------------------------------------------------------------------------------- /optimum/neuron/cache/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/cache/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/cache/entries/cache_entry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/cache/entries/cache_entry.py -------------------------------------------------------------------------------- /optimum/neuron/cache/entries/multi_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/cache/entries/multi_model.py -------------------------------------------------------------------------------- /optimum/neuron/cache/entries/single_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/cache/entries/single_model.py -------------------------------------------------------------------------------- /optimum/neuron/cache/hub_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/cache/hub_cache.py -------------------------------------------------------------------------------- /optimum/neuron/cache/optimum_neuron_cc_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/cache/optimum_neuron_cc_wrapper.py -------------------------------------------------------------------------------- /optimum/neuron/cache/traced.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/cache/traced.py -------------------------------------------------------------------------------- /optimum/neuron/cache/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/cache/training.py -------------------------------------------------------------------------------- /optimum/neuron/configuration_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/configuration_utils.py -------------------------------------------------------------------------------- /optimum/neuron/generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/generation/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/generation/logits_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/generation/logits_process.py -------------------------------------------------------------------------------- /optimum/neuron/generation/token_selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/generation/token_selector.py -------------------------------------------------------------------------------- /optimum/neuron/generation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/generation/utils.py -------------------------------------------------------------------------------- /optimum/neuron/hf_argparser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/hf_argparser.py -------------------------------------------------------------------------------- /optimum/neuron/modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/modeling.py -------------------------------------------------------------------------------- /optimum/neuron/modeling_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/modeling_base.py -------------------------------------------------------------------------------- /optimum/neuron/modeling_diffusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/modeling_diffusion.py -------------------------------------------------------------------------------- /optimum/neuron/modeling_sentence_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/modeling_sentence_transformers.py -------------------------------------------------------------------------------- /optimum/neuron/modeling_seq2seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/modeling_seq2seq.py -------------------------------------------------------------------------------- /optimum/neuron/modeling_traced.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/modeling_traced.py -------------------------------------------------------------------------------- /optimum/neuron/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/auto_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/auto_model.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/auto_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/auto_models.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/config.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/graph_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/graph_builder.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/model_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/model_wrapper.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/attention/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/attention/attention_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/attention/attention_base.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/attention/gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/attention/gqa.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/attention/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/attention/rope.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/attention/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/attention/utils.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/checkpoint.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/decoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/decoder/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/decoder/decoder_builders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/decoder/decoder_builders.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/decoder/decoder_wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/decoder/decoder_wrappers.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/decoder/modeling_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/decoder/modeling_decoder.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/generation/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/generation/generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/generation/generation_utils.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/generation/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/generation/sampling.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/kvcache/kv_cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/kvcache/kv_cache_manager.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/kvcache/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/kvcache/utils.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/moe.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/moe_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/moe_v2.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/modules/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/modules/rms_norm.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/pretrained_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/pretrained_model.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/utils/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/utils/distributed.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/utils/layer_boundary_marker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/utils/layer_boundary_marker.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/backend/utils/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/backend/utils/random.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/bert/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/bert/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/bert/modeling_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/bert/modeling_bert.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/clip/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/clip/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/clip/modeling_clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/clip/modeling_clip.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/flux/flux_transformer_2d/modeling_flux_transformer_2d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/flux/flux_transformer_2d/modeling_flux_transformer_2d.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/flux/flux_transformer_2d/modules/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/flux/flux_transformer_2d/modules/activations.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/flux/flux_transformer_2d/modules/embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/flux/flux_transformer_2d/modules/embeddings.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/flux/flux_transformer_2d/modules/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/flux/flux_transformer_2d/modules/kernels.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/flux/flux_transformer_2d/modules/normalization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/flux/flux_transformer_2d/modules/normalization.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/flux/t5/modeling_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/flux/t5/modeling_t5.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/granite/modeling_granite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/granite/modeling_granite.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/llama/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/llama/modeling_llama.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/llama4/modeling_llama4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/llama4/modeling_llama4.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/mixtral/modeling_mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/mixtral/modeling_mixtral.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/modeling_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/modeling_utils.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/phi3/modeling_phi3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/phi3/modeling_phi3.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/qwen2/modeling_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/qwen2/modeling_qwen2.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/qwen3/modeling_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/qwen3/modeling_qwen3.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/qwen3_moe/modeling_qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/qwen3_moe/modeling_qwen3_moe.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/smollm3/modeling_smollm3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/smollm3/modeling_smollm3.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/t5/modeling_t5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/t5/modeling_t5.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/whisper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/whisper/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/whisper/modeling_whisper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/whisper/modeling_whisper.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/yolos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/yolos/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/inference/yolos/modeling_yolos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/inference/yolos/modeling_yolos.py -------------------------------------------------------------------------------- /optimum/neuron/models/neuron_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/neuron_config.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/auto_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/auto_models.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/checkpointing.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/config.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/granite/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum/neuron/models/training/granite/modeling_granite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/granite/modeling_granite.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/llama/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/llama/modeling_llama.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/loss_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/loss_utils.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/masking_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/masking_utils.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/modeling_auto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/modeling_auto.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/modeling_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/modeling_utils.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/pipeline_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/pipeline_utils.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/qwen3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum/neuron/models/training/qwen3/modeling_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/qwen3/modeling_qwen3.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/training_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/training_utils.py -------------------------------------------------------------------------------- /optimum/neuron/models/training/transformations_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/models/training/transformations_utils.py -------------------------------------------------------------------------------- /optimum/neuron/peft/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/peft/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/peft/mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/peft/mapping.py -------------------------------------------------------------------------------- /optimum/neuron/peft/mapping_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/peft/mapping_func.py -------------------------------------------------------------------------------- /optimum/neuron/peft/peft_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/peft/peft_model.py -------------------------------------------------------------------------------- /optimum/neuron/peft/tuners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/peft/tuners/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/peft/tuners/lora/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/peft/tuners/lora/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/peft/tuners/lora/layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/peft/tuners/lora/layer.py -------------------------------------------------------------------------------- /optimum/neuron/peft/tuners/lora/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/peft/tuners/lora/model.py -------------------------------------------------------------------------------- /optimum/neuron/peft/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum/neuron/peft/utils/save_and_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/peft/utils/save_and_load.py -------------------------------------------------------------------------------- /optimum/neuron/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/pipelines/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/pipelines/diffusers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/pipelines/diffusers/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/pipelines/diffusers/pipeline_controlnet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/pipelines/diffusers/pipeline_controlnet.py -------------------------------------------------------------------------------- /optimum/neuron/pipelines/diffusers/pipeline_controlnet_sd_xl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/pipelines/diffusers/pipeline_controlnet_sd_xl.py -------------------------------------------------------------------------------- /optimum/neuron/pipelines/diffusers/pipeline_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/pipelines/diffusers/pipeline_utils.py -------------------------------------------------------------------------------- /optimum/neuron/pipelines/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/pipelines/transformers/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/pipelines/transformers/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/pipelines/transformers/base.py -------------------------------------------------------------------------------- /optimum/neuron/pipelines/transformers/sentence_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/pipelines/transformers/sentence_transformers.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/metrics/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/metrics/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/metrics/base.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/metrics/collector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/metrics/collector.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/metrics/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/metrics/constants.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/metrics/efficiency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/metrics/efficiency.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/metrics/mfu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/metrics/mfu.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/metrics/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/metrics/registry.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/metrics/throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/metrics/throughput.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/metrics/timing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/metrics/timing.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/metrics/window.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/metrics/window.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/sft_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/sft_config.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/sft_trainer.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/training_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/training_args.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/transformers.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/trl_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/trl_utils.py -------------------------------------------------------------------------------- /optimum/neuron/trainers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/trainers/utils.py -------------------------------------------------------------------------------- /optimum/neuron/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/utils/argument_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/argument_utils.py -------------------------------------------------------------------------------- /optimum/neuron/utils/cache_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/cache_utils.py -------------------------------------------------------------------------------- /optimum/neuron/utils/constant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/constant.py -------------------------------------------------------------------------------- /optimum/neuron/utils/deprecate_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/deprecate_utils.py -------------------------------------------------------------------------------- /optimum/neuron/utils/doc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/doc.py -------------------------------------------------------------------------------- /optimum/neuron/utils/ecr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/ecr.py -------------------------------------------------------------------------------- /optimum/neuron/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/import_utils.py -------------------------------------------------------------------------------- /optimum/neuron/utils/input_generators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/input_generators.py -------------------------------------------------------------------------------- /optimum/neuron/utils/instance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/instance.py -------------------------------------------------------------------------------- /optimum/neuron/utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/misc.py -------------------------------------------------------------------------------- /optimum/neuron/utils/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/model_utils.py -------------------------------------------------------------------------------- /optimum/neuron/utils/neuron_cc_wrapper: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/neuron_cc_wrapper -------------------------------------------------------------------------------- /optimum/neuron/utils/neuron_device_memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/neuron_device_memory.py -------------------------------------------------------------------------------- /optimum/neuron/utils/neuron_parallel_compile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/neuron_parallel_compile.py -------------------------------------------------------------------------------- /optimum/neuron/utils/optimization_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/optimization_utils.py -------------------------------------------------------------------------------- /optimum/neuron/utils/patching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/patching.py -------------------------------------------------------------------------------- /optimum/neuron/utils/require_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/require_utils.py -------------------------------------------------------------------------------- /optimum/neuron/utils/runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/runner.py -------------------------------------------------------------------------------- /optimum/neuron/utils/system.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/system.py -------------------------------------------------------------------------------- /optimum/neuron/utils/testing_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/testing_utils.py -------------------------------------------------------------------------------- /optimum/neuron/utils/torch_xla_and_neuronx_initialization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/torch_xla_and_neuronx_initialization.py -------------------------------------------------------------------------------- /optimum/neuron/utils/version_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/utils/version_utils.py -------------------------------------------------------------------------------- /optimum/neuron/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/version.py -------------------------------------------------------------------------------- /optimum/neuron/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/vllm/__init__.py -------------------------------------------------------------------------------- /optimum/neuron/vllm/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/vllm/model_loader.py -------------------------------------------------------------------------------- /optimum/neuron/vllm/platform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/vllm/platform.py -------------------------------------------------------------------------------- /optimum/neuron/vllm/plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/vllm/plugin.py -------------------------------------------------------------------------------- /optimum/neuron/vllm/runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/vllm/runner.py -------------------------------------------------------------------------------- /optimum/neuron/vllm/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/optimum/neuron/vllm/worker.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/pyproject.toml -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/decoder/nxd_testing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/nxd_testing.py -------------------------------------------------------------------------------- /tests/decoder/test_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_attention.py -------------------------------------------------------------------------------- /tests/decoder/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_cache.py -------------------------------------------------------------------------------- /tests/decoder/test_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_cli.py -------------------------------------------------------------------------------- /tests/decoder/test_decoder_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_decoder_config.py -------------------------------------------------------------------------------- /tests/decoder/test_decoder_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_decoder_embedding.py -------------------------------------------------------------------------------- /tests/decoder/test_decoder_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_decoder_export.py -------------------------------------------------------------------------------- /tests/decoder/test_decoder_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_decoder_generation.py -------------------------------------------------------------------------------- /tests/decoder/test_decoder_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_decoder_hub.py -------------------------------------------------------------------------------- /tests/decoder/test_decoder_pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_decoder_pipelines.py -------------------------------------------------------------------------------- /tests/decoder/test_device_memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_device_memory.py -------------------------------------------------------------------------------- /tests/decoder/test_fused_logits_warper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_fused_logits_warper.py -------------------------------------------------------------------------------- /tests/decoder/test_modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/decoder/test_modules.py -------------------------------------------------------------------------------- /tests/exporters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/exporters/__init__.py -------------------------------------------------------------------------------- /tests/exporters/exporters_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/exporters/exporters_utils.py -------------------------------------------------------------------------------- /tests/exporters/test_cpu_compilation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/exporters/test_cpu_compilation.py -------------------------------------------------------------------------------- /tests/exporters/test_diffusers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/exporters/test_diffusers.py -------------------------------------------------------------------------------- /tests/exporters/test_transformers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/exporters/test_transformers.py -------------------------------------------------------------------------------- /tests/fixtures/llm/export_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/fixtures/llm/export_models.py -------------------------------------------------------------------------------- /tests/fixtures/llm/vllm_docker_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/fixtures/llm/vllm_docker_service.py -------------------------------------------------------------------------------- /tests/fixtures/llm/vllm_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/fixtures/llm/vllm_service.py -------------------------------------------------------------------------------- /tests/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/__init__.py -------------------------------------------------------------------------------- /tests/inference/cache_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/cache_utils.py -------------------------------------------------------------------------------- /tests/inference/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/conftest.py -------------------------------------------------------------------------------- /tests/inference/diffusers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/inference/diffusers/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/diffusers/conftest.py -------------------------------------------------------------------------------- /tests/inference/diffusers/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/diffusers/test_cache.py -------------------------------------------------------------------------------- /tests/inference/diffusers/test_export_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/diffusers/test_export_cli.py -------------------------------------------------------------------------------- /tests/inference/diffusers/test_flux.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/diffusers/test_flux.py -------------------------------------------------------------------------------- /tests/inference/diffusers/test_pixart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/diffusers/test_pixart.py -------------------------------------------------------------------------------- /tests/inference/diffusers/test_stable_diffusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/diffusers/test_stable_diffusion.py -------------------------------------------------------------------------------- /tests/inference/inference_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/inference_utils.py -------------------------------------------------------------------------------- /tests/inference/seq2seq/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/seq2seq/conftest.py -------------------------------------------------------------------------------- /tests/inference/seq2seq/test_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/seq2seq/test_export.py -------------------------------------------------------------------------------- /tests/inference/seq2seq/test_export_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/seq2seq/test_export_cli.py -------------------------------------------------------------------------------- /tests/inference/seq2seq/test_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/seq2seq/test_generate.py -------------------------------------------------------------------------------- /tests/inference/seq2seq/test_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/seq2seq/test_hub.py -------------------------------------------------------------------------------- /tests/inference/seq2seq/test_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/seq2seq/test_parallel.py -------------------------------------------------------------------------------- /tests/inference/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/transformers/__init__.py -------------------------------------------------------------------------------- /tests/inference/transformers/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/transformers/test_cache.py -------------------------------------------------------------------------------- /tests/inference/transformers/test_export_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/transformers/test_export_cli.py -------------------------------------------------------------------------------- /tests/inference/transformers/test_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/inference/transformers/test_modeling.py -------------------------------------------------------------------------------- /tests/pipelines/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/pipelines/conftest.py -------------------------------------------------------------------------------- /tests/pipelines/test_encoder_pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/pipelines/test_encoder_pipelines.py -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | asyncio_mode = auto 3 | -------------------------------------------------------------------------------- /tests/sagemaker/test_images_uri.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/sagemaker/test_images_uri.py -------------------------------------------------------------------------------- /tests/training/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/README.md -------------------------------------------------------------------------------- /tests/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/__init__.py -------------------------------------------------------------------------------- /tests/training/distributed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/distributed_utils.py -------------------------------------------------------------------------------- /tests/training/synchronize_with_cache_repo_for_ci.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/synchronize_with_cache_repo_for_ci.py -------------------------------------------------------------------------------- /tests/training/test_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_checkpointing.py -------------------------------------------------------------------------------- /tests/training/test_custom_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_custom_modeling.py -------------------------------------------------------------------------------- /tests/training/test_distributed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_distributed_utils.py -------------------------------------------------------------------------------- /tests/training/test_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_flash_attn.py -------------------------------------------------------------------------------- /tests/training/test_linears.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_linears.py -------------------------------------------------------------------------------- /tests/training/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_metrics.py -------------------------------------------------------------------------------- /tests/training/test_mixed_precision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_mixed_precision.py -------------------------------------------------------------------------------- /tests/training/test_modeling_auto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_modeling_auto.py -------------------------------------------------------------------------------- /tests/training/test_neuron_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_neuron_sft_trainer.py -------------------------------------------------------------------------------- /tests/training/test_neuron_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_neuron_trainer.py -------------------------------------------------------------------------------- /tests/training/test_optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_optimizer.py -------------------------------------------------------------------------------- /tests/training/test_overfit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_overfit.py -------------------------------------------------------------------------------- /tests/training/test_zero1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/test_zero1.py -------------------------------------------------------------------------------- /tests/training/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/training/utils.py -------------------------------------------------------------------------------- /tests/vllm/docker/test_vllm_docker_service_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/vllm/docker/test_vllm_docker_service_generate.py -------------------------------------------------------------------------------- /tests/vllm/engine/test_vllm_engine_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/vllm/engine/test_vllm_engine_generate.py -------------------------------------------------------------------------------- /tests/vllm/service/test_vllm_agentic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/vllm/service/test_vllm_agentic.py -------------------------------------------------------------------------------- /tests/vllm/service/test_vllm_model_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/vllm/service/test_vllm_model_config.py -------------------------------------------------------------------------------- /tests/vllm/service/test_vllm_service_generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tests/vllm/service/test_vllm_service_generate.py -------------------------------------------------------------------------------- /tools/cache/auto_fill_diffusion_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tools/cache/auto_fill_diffusion_cache.py -------------------------------------------------------------------------------- /tools/cache/auto_fill_llm_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tools/cache/auto_fill_llm_cache.py -------------------------------------------------------------------------------- /tools/decode_hlos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tools/decode_hlos.py -------------------------------------------------------------------------------- /tools/list_top_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tools/list_top_models.py -------------------------------------------------------------------------------- /tools/prune_test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-neuron/HEAD/tools/prune_test_models.py --------------------------------------------------------------------------------