├── .gemini └── config.yaml ├── .github ├── Dockerfile.buildwheel ├── Dockerfile.nightly.a2 ├── Dockerfile.nightly.a3 ├── ISSUE_TEMPLATE │ ├── 100-documentation.yml │ ├── 110-user-story.yml │ ├── 200-installation.yml │ ├── 300-usage.yml │ ├── 400-bug-report.yml │ ├── 500-feature-request.yml │ ├── 600-new-model.yml │ ├── 700-performance-discussion.yml │ ├── 750-RFC.yml │ ├── 800-others.yml │ ├── 900-release-checklist.yml │ └── config.yml ├── PULL_REQUEST_TEMPLATE.md ├── actionlint.yaml ├── dependabot.yml ├── format_pr_body.sh ├── labeler.yml └── workflows │ ├── _e2e_nightly_multi_node.yaml │ ├── _e2e_nightly_single_node.yaml │ ├── _e2e_nightly_single_node_models.yaml │ ├── _e2e_test.yaml │ ├── _nightly_image_build.yaml │ ├── format_pr_body.yaml │ ├── image_310p_openeuler.yml │ ├── image_310p_ubuntu.yml │ ├── image_a3_openeuler.yml │ ├── image_a3_ubuntu.yml │ ├── image_openeuler.yml │ ├── image_ubuntu.yml │ ├── label_merge_conflict.yml │ ├── labeler.yml │ ├── matchers │ ├── actionlint.json │ ├── mypy.json │ └── ruff.json │ ├── nightly_benchmarks.yaml │ ├── pre-commit.yml │ ├── release_code.yml │ ├── release_whl.yml │ ├── reminder_comment.yml │ ├── vllm_ascend_doctest.yaml │ ├── vllm_ascend_test_310p.yaml │ ├── vllm_ascend_test_full_vllm_main.yaml │ ├── vllm_ascend_test_nightly_a2.yaml │ ├── vllm_ascend_test_nightly_a3.yaml │ ├── vllm_ascend_test_pr_full.yaml │ ├── vllm_ascend_test_pr_light.yaml │ └── vllm_ascend_test_report.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DCO ├── Dockerfile ├── Dockerfile.310p ├── Dockerfile.310p.openEuler ├── Dockerfile.a3 ├── Dockerfile.a3.openEuler ├── Dockerfile.openEuler ├── LICENSE ├── README.md ├── README.zh.md ├── benchmarks ├── README.md ├── ops │ └── ben_vocabparallelembedding.py ├── requirements-bench.txt ├── scripts │ ├── convert_json_to_markdown.py │ ├── perf_result_template.md │ └── run-performance-benchmarks.sh └── tests │ ├── latency-tests.json │ ├── serving-tests.json │ └── throughput-tests.json ├── cmake └── utils.cmake ├── codecov.yml ├── collect_env.py ├── csrc ├── CMakeLists.txt ├── aclnn_torch_adapter │ ├── NPUBridge.cpp │ ├── NPUBridge.h │ ├── NPUStorageImpl.cpp │ ├── NPUStorageImpl.h │ └── op_api_common.h ├── batch_matmul_transpose │ ├── op_host │ │ ├── batch_matmul_transpose.h │ │ ├── common.h │ │ ├── common_tiling.h │ │ └── tiling │ │ │ ├── tiling_data.cpp │ │ │ └── tiling_data.h │ └── op_kernel │ │ └── batch_matmul_transpose_kernel.cpp ├── build.sh ├── build_aclnn.sh ├── camem_allocator.cpp ├── cmake │ ├── config.cmake │ ├── func.cmake │ ├── intf.cmake │ ├── intf_pub.cmake │ ├── modules │ │ └── Findalog.cmake │ └── scripts │ │ └── prepare.sh ├── grouped_matmul_swiglu_quant_weight_nz_tensor_list │ ├── op_host │ │ ├── CMakeLists.txt │ │ ├── aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp │ │ ├── aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.h │ │ ├── grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp │ │ ├── grouped_matmul_swiglu_quant_weight_nz_tensor_list.h │ │ ├── grouped_matmul_swiglu_quant_weight_nz_tensor_list_def.cpp │ │ ├── grouped_matmul_swiglu_quant_weight_nz_tensor_list_proto.cpp │ │ ├── grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.cpp │ │ └── grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.h │ └── op_kernel │ │ ├── grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp │ │ ├── grouped_matmul_swiglu_quant_weight_nz_tensor_list.h │ │ ├── grouped_matmul_swiglu_quant_weight_nz_tensor_list_split_ws.h │ │ └── grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h ├── kernels │ ├── bgmv_expand.cpp │ ├── bgmv_shrink.cpp │ ├── get_masked_input_and_mask_kernel.cpp │ ├── math_utils.h │ ├── pos_encoding_kernels.cpp │ ├── sgmv_expand.cpp │ ├── sgmv_shrink.cpp │ ├── types.h │ └── utils.h ├── lightning_indexer │ ├── op_host │ │ ├── CMakeLists.txt │ │ ├── lightning_indexer_def.cpp │ │ ├── lightning_indexer_proto.cpp │ │ ├── lightning_indexer_tiling.cpp │ │ └── lightning_indexer_tiling.h │ └── op_kernel │ │ ├── lightning_indexer.cpp │ │ ├── lightning_indexer_common.h │ │ ├── lightning_indexer_kernel.h │ │ ├── lightning_indexer_service_cube.h │ │ ├── lightning_indexer_service_vector.h │ │ ├── lightning_indexer_template_tiling_key.h │ │ └── lightning_indexer_vector.h ├── mla_preprocess │ ├── op_host │ │ ├── mla_preprocess.h │ │ └── tiling │ │ │ └── mla_preprocess_tiling.h │ └── op_kernel │ │ ├── kernel │ │ ├── common.h │ │ ├── common_func.h │ │ ├── hardware.h │ │ ├── iterator.h │ │ ├── iterators │ │ │ ├── gm_to_l1_iterator.inc │ │ │ ├── gm_to_ub_iterator.inc │ │ │ ├── l0c_to_gm_iterator.inc │ │ │ ├── l0c_to_l1_iterator.inc │ │ │ ├── l0c_to_ub_iterator.inc │ │ │ ├── l1_to_bt_iterator.inc │ │ │ ├── l1_to_fb_iterator.inc │ │ │ ├── l1_to_l0_iterator.inc │ │ │ └── l1_to_ub_iterator.inc │ │ ├── kernel_utils.h │ │ ├── layout.h │ │ ├── mem.h │ │ ├── mma.h │ │ ├── set_fpc.h │ │ ├── simd.h │ │ └── utils.h │ │ ├── mla_preprocess.h │ │ ├── mla_preprocess_kernel.cpp │ │ ├── mla_preprocess_mix_bf16.hpp │ │ └── mla_preprocess_mix_fp16.hpp ├── ops.h ├── sparse_flash_attention │ ├── op_host │ │ ├── CMakeLists.txt │ │ ├── sparse_flash_attention_def.cpp │ │ ├── sparse_flash_attention_proto.cpp │ │ ├── sparse_flash_attention_tiling.cpp │ │ └── sparse_flash_attention_tiling.h │ └── op_kernel │ │ ├── sparse_flash_attention.cpp │ │ ├── sparse_flash_attention_common.h │ │ ├── sparse_flash_attention_kernel_mla.h │ │ ├── sparse_flash_attention_service_cube_mla.h │ │ ├── sparse_flash_attention_service_vector_mla.h │ │ └── sparse_flash_attention_template_tiling_key.h ├── torch_binding.cpp ├── torch_binding_meta.cpp ├── utils.h └── utils │ ├── CMakeLists.txt │ ├── inc │ ├── aclnn_util.h │ ├── error │ │ └── ops_error.h │ ├── fallback.h │ ├── fallback_comm.h │ ├── kernel │ │ ├── dropmask.h │ │ ├── pse.h │ │ └── util.h │ ├── log │ │ ├── inner │ │ │ └── dfx_base.h │ │ └── ops_log.h │ └── tiling │ │ ├── data_copy_transpose_tiling.h │ │ ├── data_copy_transpose_tiling_def.h │ │ ├── tiling_base.h │ │ ├── tiling_templates_registry.h │ │ └── tiling_type.h │ └── src │ └── fallback_comm.cpp ├── docs ├── Makefile ├── README.md ├── requirements-docs.txt ├── requirements-test.txt └── source │ ├── _templates │ └── sections │ │ └── header.html │ ├── assets │ ├── deployment.png │ ├── disaggregated_prefill_pull.png │ ├── disaggregated_prefill_push.png │ ├── eplb.png │ ├── multi_node_dp_deepseek.png │ ├── multi_node_dp_kimi.png │ └── workflow.png │ ├── community │ ├── contributors.md │ ├── governance.md │ ├── user_stories │ │ ├── index.md │ │ └── llamafactory.md │ └── versioning_policy.md │ ├── conf.py │ ├── developer_guide │ ├── contribution │ │ ├── index.md │ │ ├── multi_node_test.md │ │ └── testing.md │ ├── evaluation │ │ ├── accuracy_report │ │ │ ├── DeepSeek-V2-Lite.md │ │ │ ├── Qwen2.5-VL-7B-Instruct.md │ │ │ ├── Qwen3-30B-A3B.md │ │ │ ├── Qwen3-8B-Base.md │ │ │ └── index.md │ │ ├── index.md │ │ ├── using_ais_bench.md │ │ ├── using_evalscope.md │ │ ├── using_lm_eval.md │ │ └── using_opencompass.md │ ├── feature_guide │ │ ├── ACL_Graph.md │ │ ├── KV_Cache_Pool_Guide.md │ │ ├── ModelRunner_prepare_inputs.md │ │ ├── Multi_Token_Prediction.md │ │ ├── add_custom_aclnn_op.md │ │ ├── disaggregated_prefill.md │ │ ├── eplb_swift_balancer.md │ │ ├── index.md │ │ └── patch.md │ └── performance_and_debug │ │ ├── index.md │ │ ├── msprobe_guide.md │ │ ├── optimization_and_tuning.md │ │ ├── performance_benchmark.md │ │ ├── profile_execute_duration.md │ │ └── service_profiling_guide.md │ ├── faqs.md │ ├── index.md │ ├── installation.md │ ├── locale │ └── zh_CN │ │ └── LC_MESSAGES │ │ ├── community │ │ ├── contributors.po │ │ ├── governance.po │ │ ├── user_stories │ │ │ ├── index.po │ │ │ └── llamafactory.po │ │ └── versioning_policy.po │ │ ├── developer_guide │ │ ├── contribution │ │ │ ├── index.po │ │ │ └── testing.po │ │ ├── evaluation │ │ │ ├── accuracy_report │ │ │ │ └── index.po │ │ │ ├── index.po │ │ │ ├── using_evalscope.po │ │ │ ├── using_lm_eval.po │ │ │ └── using_opencompass.po │ │ ├── feature_guide │ │ │ ├── index.po │ │ │ └── patch.po │ │ ├── modeling │ │ │ ├── adding_a_new_model.po │ │ │ ├── adding_a_new_multimodal_model.po │ │ │ └── index.po │ │ └── performance_and_debug │ │ │ ├── index.po │ │ │ ├── msprobe_guide.po │ │ │ ├── performance_benchmark.po │ │ │ ├── profile_execute_duration.po │ │ │ └── service_profiling_guide.po │ │ ├── faqs.po │ │ ├── index.po │ │ ├── installation.po │ │ ├── quick_start.po │ │ ├── tutorials │ │ ├── index.po │ │ ├── multi_node.po │ │ ├── multi_npu.po │ │ ├── multi_npu_moge.po │ │ ├── multi_npu_quantization.po │ │ ├── multi_npu_qwen3_moe.po │ │ ├── single_node_300i.po │ │ ├── single_npu.po │ │ ├── single_npu_audio.po │ │ ├── single_npu_multimodal.po │ │ └── single_npu_qwen3_embedding.po │ │ └── user_guide │ │ ├── configuration │ │ ├── additional_config.po │ │ ├── env_vars.po │ │ └── index.po │ │ ├── feature_guide │ │ ├── graph_mode.po │ │ ├── index.po │ │ ├── lora.po │ │ ├── quantization.po │ │ ├── sleep_mode.po │ │ └── structured_output.po │ │ ├── release_notes.po │ │ └── support_matrix │ │ ├── index.po │ │ ├── supported_features.po │ │ └── supported_models.po │ ├── logos │ ├── vllm-ascend-logo-text-dark.png │ └── vllm-ascend-logo-text-light.png │ ├── quick_start.md │ ├── tutorials │ ├── DeepSeek-V3.1.md │ ├── DeepSeek-V3.2-Exp.md │ ├── Qwen2.5-Omni.md │ ├── Qwen3-Coder-30B-A3B.md │ ├── index.md │ ├── multi_node.md │ ├── multi_node_kimi.md │ ├── multi_node_pd_disaggregation_mooncake.md │ ├── multi_node_qwen3vl.md │ ├── multi_node_ray.md │ ├── multi_npu.md │ ├── multi_npu_moge.md │ ├── multi_npu_quantization.md │ ├── multi_npu_qwen3_moe.md │ ├── multi_npu_qwen3_next.md │ ├── single_node_300i.md │ ├── single_node_pd_disaggregation_llmdatadist.md │ ├── single_npu.md │ ├── single_npu_qwen2.5_vl.md │ ├── single_npu_qwen2_audio.md │ ├── single_npu_qwen3_embedding.md │ ├── single_npu_qwen3_quantization.md │ └── single_npu_qwen3_w4a4.md │ └── user_guide │ ├── configuration │ ├── additional_config.md │ ├── env_vars.md │ └── index.md │ ├── feature_guide │ ├── dynamic_batch.md │ ├── eplb_swift_balancer.md │ ├── external_dp.md │ ├── graph_mode.md │ ├── images │ │ ├── eplb_img.png │ │ ├── netloader_flowchart.png │ │ ├── netloader_timing_diagram.png │ │ └── structured_output_1.png │ ├── index.md │ ├── kv_pool.md │ ├── lora.md │ ├── netloader.md │ ├── quantization-llm-compressor.md │ ├── quantization.md │ ├── sleep_mode.md │ └── structured_output.md │ ├── release_notes.md │ └── support_matrix │ ├── index.md │ ├── supported_features.md │ └── supported_models.md ├── examples ├── chat_templates │ └── template_qwen2_audio.jinja ├── disaggregated_prefill_v1 │ ├── README.md │ ├── gen_ranktable.py │ ├── gen_ranktable.sh │ ├── load_balance_proxy_layerwise_server_example.py │ ├── load_balance_proxy_server_example.py │ ├── mooncake_connector_deployment_guide.md │ └── run_server.sh ├── eplb │ ├── eplb_deepseek.py │ └── eplb_strategy.py ├── external_online_dp │ ├── README.md │ ├── dp_load_balance_proxy_server.py │ ├── launch_online_dp.py │ └── run_dp_template.sh ├── offline_data_parallel.py ├── offline_disaggregated_prefill_npu.py ├── offline_embed.py ├── offline_external_launcher.py ├── offline_inference_audio_language.py ├── offline_inference_npu.py ├── offline_inference_npu_long_seq.py ├── offline_inference_npu_tp2.py ├── offline_inference_sleep_mode_npu.py ├── offline_weight_load.py ├── prompt_embed_inference.py ├── prompt_embedding_inference.py ├── quantization │ └── llm-compressor │ │ ├── w8a8_int8.py │ │ └── w8a8_int8_dynamic.py └── run_dp_server.sh ├── format.sh ├── mypy.ini ├── packages.txt ├── pyproject.toml ├── requirements-dev.txt ├── requirements-lint.txt ├── requirements.txt ├── setup.py ├── tests ├── __init__.py ├── e2e │ ├── 310p │ │ ├── test_offline_inference_310p.py │ │ └── test_offline_inference_parallel_310p.py │ ├── __init__.py │ ├── common.sh │ ├── conftest.py │ ├── doctests │ │ ├── 001-quickstart-test.sh │ │ └── 002-pip-binary-installation-test.sh │ ├── model_utils.py │ ├── models │ │ ├── configs │ │ │ ├── DeepSeek-V2-Lite.yaml │ │ │ ├── ERNIE-4.5-21B-A3B-PT.yaml │ │ │ ├── InternVL3_5-8B-hf.yaml │ │ │ ├── Meta-Llama-3.1-8B-Instruct.yaml │ │ │ ├── Molmo-7B-D-0924.yaml │ │ │ ├── Qwen2-Audio-7B-Instruct.yaml │ │ │ ├── Qwen2-VL-7B-Instruct.yaml │ │ │ ├── Qwen2.5-Omni-7B.yaml │ │ │ ├── Qwen2.5-VL-7B-Instruct.yaml │ │ │ ├── Qwen3-30B-A3B-W8A8.yaml │ │ │ ├── Qwen3-30B-A3B.yaml │ │ │ ├── Qwen3-8B-Base.yaml │ │ │ ├── Qwen3-8B-W8A8.yaml │ │ │ ├── Qwen3-8B.yaml │ │ │ ├── Qwen3-Next-80B-A3B-Instruct.yaml │ │ │ ├── Qwen3-VL-30B-A3B-Instruct.yaml │ │ │ ├── Qwen3-VL-8B-Instruct.yaml │ │ │ ├── accuracy.txt │ │ │ ├── gemma-2-9b-it.yaml │ │ │ ├── gemma-3-4b-it.yaml │ │ │ ├── internlm-7b.yaml │ │ │ └── llava-1.5-7b-hf.yaml │ │ ├── conftest.py │ │ ├── report_template.md │ │ └── test_lm_eval_correctness.py │ ├── multicard │ │ ├── test_aclgraph_capture_replay.py │ │ ├── test_chunk_gated_delta_rule.py │ │ ├── test_data_parallel.py │ │ ├── test_data_parallel_tp2.py │ │ ├── test_expert_parallel.py │ │ ├── test_external_launcher.py │ │ ├── test_full_graph_mode.py │ │ ├── test_fused_moe_allgather_ep.py │ │ ├── test_ilama_lora_tp2.py │ │ ├── test_offline_inference_distributed.py │ │ ├── test_pipeline_parallel.py │ │ ├── test_prefix_caching.py │ │ ├── test_quantization.py │ │ ├── test_qwen3_moe.py │ │ ├── test_qwen3_next.py │ │ ├── test_shared_expert_dp.py │ │ ├── test_single_request_aclgraph.py │ │ ├── test_torchair_graph_mode.py │ │ └── test_weight_loader.py │ ├── nightly │ │ ├── features │ │ │ ├── test_mtpx_deepseek_r1_0528_w8a8.py │ │ │ ├── test_prefix_cache_deepseek_r1_0528_w8a8.py │ │ │ ├── test_prefix_cache_qwen3_32b_int8.py │ │ │ └── test_qwen3_32b_int8_a3_feature_stack3.py │ │ ├── models │ │ │ ├── test_deepseek_r1_0528_w8a8.py │ │ │ ├── test_deepseek_r1_w8a8_eplb.py │ │ │ ├── test_deepseek_v3_2_exp_w8a8.py │ │ │ ├── test_glm4_5.py │ │ │ ├── test_qwen2_5_vl_32b.py │ │ │ ├── test_qwen2_5_vl_7b.py │ │ │ ├── test_qwen3_235b_a22b_w8a8_eplb.py │ │ │ ├── test_qwen3_235b_w8a8.py │ │ │ ├── test_qwen3_30b_w8a8.py │ │ │ ├── test_qwen3_32b.py │ │ │ ├── test_qwen3_32b_int8.py │ │ │ └── test_qwq_32b.py │ │ ├── multi_node │ │ │ ├── __init__.py │ │ │ ├── config │ │ │ │ ├── __init__.py │ │ │ │ ├── models │ │ │ │ │ ├── DeepSeek-R1-W8A8-A2-torchair.yaml │ │ │ │ │ ├── DeepSeek-R1-W8A8-A2.yaml │ │ │ │ │ ├── DeepSeek-R1-W8A8-EPLB.yaml │ │ │ │ │ ├── DeepSeek-R1-W8A8.yaml │ │ │ │ │ ├── DeepSeek-V3.yaml │ │ │ │ │ ├── DeepSeek-V3_2-Exp-bf16.yaml │ │ │ │ │ ├── GLM-4_5.yaml │ │ │ │ │ ├── Qwen3-235B-A3B.yaml │ │ │ │ │ ├── Qwen3-235B-W8A8-EPLB.yaml │ │ │ │ │ └── Qwen3-235B-W8A8.yaml │ │ │ │ ├── multi_node_config.py │ │ │ │ └── utils.py │ │ │ ├── scripts │ │ │ │ ├── lws.yaml.jinja2 │ │ │ │ └── run.sh │ │ │ └── test_multi_node.py │ │ └── ops │ │ │ ├── __init__.py │ │ │ ├── test_batch_matmul_transpose.py │ │ │ ├── test_bgmv_expand.py │ │ │ ├── test_bgmv_shrink.py │ │ │ ├── test_fused_moe.py │ │ │ ├── test_gating_top_k_softmax.py │ │ │ ├── test_gmm_swiglu_quant_weight_nz_tensor_list.py │ │ │ ├── test_grouped_matmul_swiglu_quant.py │ │ │ ├── test_mla_preprocess.py │ │ │ ├── test_rotary_embedding.py │ │ │ ├── test_vocabparallelembedding.py │ │ │ └── triton │ │ │ ├── __init__.py │ │ │ └── test_rope.py │ ├── pd_disaggreate │ │ ├── run_edge_case_test.sh │ │ ├── setup_pd.sh │ │ ├── test_edge_cases.py │ │ └── test_pd_e2e.py │ ├── prompts │ │ └── example.txt │ ├── run_disagg_pd.sh │ ├── run_doctests.sh │ ├── singlecard │ │ ├── __init__.py │ │ ├── multi-modal │ │ │ └── test_internvl.py │ │ ├── spec_decode_v1 │ │ │ ├── test_v1_mtp_correctness.py │ │ │ ├── test_v1_mtp_torchair_correctness.py │ │ │ └── test_v1_spec_decode.py │ │ ├── test_aclgraph.py │ │ ├── test_aclgraph_mem.py │ │ ├── test_bge_model.py │ │ ├── test_camem.py │ │ ├── test_completion_with_prompt_embeds.py │ │ ├── test_embedding.py │ │ ├── test_embedding_aclgraph.py │ │ ├── test_guided_decoding.py │ │ ├── test_ilama_lora.py │ │ ├── test_multistream_overlap_shared_expert.py │ │ ├── test_profile_execute_duration.py │ │ ├── test_quantization.py │ │ ├── test_sampler.py │ │ └── test_vlm.py │ ├── utils.py │ └── vllm_interface │ │ ├── singlecard │ │ └── test_sampler.py │ │ └── vllm_test.cfg └── ut │ ├── __init__.py │ ├── attention │ ├── test_attention_mask.py │ ├── test_attention_v1.py │ ├── test_mla_v1.py │ └── test_sfa_v1.py │ ├── base.py │ ├── compilation │ └── test_acl_graph.py │ ├── conftest.py │ ├── core │ ├── test_schedule_config.py │ └── test_scheduler.py │ ├── device_allocator │ └── test_camem.py │ ├── distributed │ ├── device_communicators │ │ ├── test_pyhccl.py │ │ └── test_pyhccl_wrapper.py │ ├── mooncake │ │ └── test_config_data.py │ ├── test_communicator.py │ ├── test_determin_expert_map_all.py │ └── test_parallel_state.py │ ├── eplb │ ├── adaptor │ │ └── test_abstract_adaptor.py │ └── core │ │ ├── policy │ │ ├── test_policy_abstract.py │ │ ├── test_policy_dynamic_ep.py │ │ ├── test_policy_dynamic_ep_v2.py │ │ └── test_policy_factor.py │ │ ├── test_eplb_device_transfer_loader.py │ │ └── test_eplb_utils.py │ ├── fake_weight │ └── config.json │ ├── kv_connector │ ├── test_llmdatadist_connector.py │ ├── test_mooncake_connector.py │ ├── test_mooncake_layerwise_connector.py │ ├── test_remote_decode_lifecycle.py │ ├── test_remote_prefill_lifecycle.py │ └── utils.py │ ├── model_loader │ └── netloader │ │ ├── test_netloader.py │ │ ├── test_netloader_elastic.py │ │ ├── test_netloader_load.py │ │ └── test_netloader_utils.py │ ├── models │ ├── __init__.py │ ├── conftest.py │ └── test_mla.py │ ├── ops │ ├── expert_map.json │ ├── test_activation.py │ ├── test_comm_utils.py │ ├── test_expert_load_balancer.py │ ├── test_fused_moe.py │ ├── test_layernorm.py │ ├── test_linear.py │ ├── test_moe_comm_method.py │ ├── test_prepare_finalize.py │ ├── test_rotary_embedding.py │ ├── test_token_dispatcher.py │ └── test_vocab_parallel_embedding.py │ ├── patch │ └── worker │ │ └── patch_common │ │ ├── test_patch_distributed.py │ │ └── test_patch_minicpm.py │ ├── quantization │ ├── test_quant_config.py │ ├── test_utils.py │ ├── test_w4a4_flatquant_dynamic.py │ ├── test_w4a8_dynamic.py │ ├── test_w8a8.py │ └── test_w8a8_dynamic.py │ ├── sample │ ├── logits_processor │ │ └── test_builtin.py │ ├── test_rejection_sampler.py │ └── test_sampler.py │ ├── spec_decode │ └── test_eagle_proposer.py │ ├── test_ascend_config.py │ ├── test_envs.py │ ├── test_platform.py │ ├── test_utils.py │ ├── torchair │ ├── __init__.py │ ├── models │ │ ├── test_qwen3_moe.py │ │ ├── test_torchair_deepseek_mtp.py │ │ └── test_torchair_deepseek_v2.py │ ├── ops │ │ ├── test_torchair_fused_moe.py │ │ └── test_torchair_rotary_embedding.py │ ├── quantization │ │ ├── test_torchair_w4a8_dynamic.py │ │ └── test_torchair_w8a8_dynamic.py │ ├── test_torchair_attention.py │ ├── test_torchair_mla.py │ ├── test_torchair_model_runner.py │ ├── test_torchair_mtp_proposer.py │ ├── test_torchair_sfa.py │ ├── test_torchair_worker.py │ └── test_utils.py │ └── worker │ ├── test_input_batch.py │ ├── test_model_runner_v1.py │ └── test_worker_v1.py ├── tools ├── actionlint.sh ├── aisbench.py ├── check_python_src_init.py ├── check_repo.sh ├── enforce_regex_import.py ├── mooncake_installer.sh ├── mypy.sh ├── png-lint.sh ├── send_mm_request.py ├── send_request.py ├── shellcheck.sh └── sphinx-lint.sh ├── typos.toml └── vllm_ascend ├── __init__.py ├── _cann_ops_custom └── .gitkeep ├── ascend_config.py ├── ascend_forward_context.py ├── attention ├── __init__.py ├── attention_mask.py ├── attention_v1.py ├── mla_v1.py ├── sfa_v1.py └── utils.py ├── compilation ├── __init__.py └── acl_graph.py ├── core ├── __init__.py ├── recompute_schedule_config.py ├── recompute_scheduler.py ├── schedule_config.py ├── scheduler.py └── scheduler_dynamic_batch.py ├── cpu_binding.py ├── device_allocator ├── __init__.py └── camem.py ├── distributed ├── __init__.py ├── communicator.py ├── cpu_offload_connector.py ├── cpu_offload_manager │ ├── __init__.py │ ├── cpu_kv_cache_manager.py │ └── metadata.py ├── device_communicators │ ├── __init__.py │ ├── pyhccl.py │ └── pyhccl_wrapper.py ├── kvpool │ ├── __init__.py │ ├── ascend_store_connector.py │ ├── backend │ │ ├── __init__.py │ │ ├── backend.py │ │ ├── memcache_backend.py │ │ └── mooncake_backend.py │ ├── config_data.py │ ├── kv_transfer.py │ ├── pool_scheduler.py │ └── pool_worker.py ├── llmdatadist_c_mgr_connector.py ├── mooncake_connector.py ├── mooncake_layerwise_connector.py ├── mooncake_transfer_engine.py ├── parallel_state.py └── utils.py ├── envs.py ├── eplb ├── __init__.py ├── adaptor │ ├── __init__.py │ ├── abstract_adaptor.py │ └── vllm_adaptor.py ├── core │ ├── __init__.py │ ├── eplb_device_transfer_loader.py │ ├── eplb_utils.py │ ├── eplb_worker.py │ └── policy │ │ ├── __init__.py │ │ ├── policy_abstract.py │ │ ├── policy_dynamic_ep.py │ │ ├── policy_dynamic_ep_v2.py │ │ ├── policy_factory.py │ │ ├── policy_flashlb.py │ │ └── policy_random.py ├── eplb_updator.py └── utils.py ├── kv_offload ├── __init__.py ├── cpu_npu.py └── npu.py ├── lora ├── __init__.py ├── lora_ops.py ├── punica_npu.py └── utils.py ├── meta_registration.py ├── model_loader ├── __init__.py └── netloader │ ├── __init__.py │ ├── executor │ ├── __init__.py │ └── elastic_load.py │ ├── interaction │ ├── __init__.py │ └── elastic.py │ ├── load.py │ ├── netloader.py │ └── utils.py ├── ops ├── __init__.py ├── activation.py ├── attention.py ├── expert_load_balancer.py ├── fused_moe │ ├── __init__.py │ ├── comm_utils.py │ ├── experts_selector.py │ ├── fused_moe.py │ ├── moe_comm_method.py │ ├── moe_mlp.py │ ├── prepare_finalize.py │ └── token_dispatcher.py ├── layernorm.py ├── linear.py ├── linear_op.py ├── mla.py ├── register_custom_ops.py ├── rotary_embedding.py ├── triton │ ├── __init__.py │ ├── fla │ │ ├── __init__.py │ │ ├── chunk.py │ │ ├── chunk_delta_h.py │ │ ├── chunk_o.py │ │ ├── chunk_scaled_dot_kkt.py │ │ ├── cumsum.py │ │ ├── layernorm_guard.py │ │ ├── sigmoid_gating.py │ │ ├── solve_tril.py │ │ ├── utils.py │ │ └── wy_fast.py │ ├── mamba │ │ ├── __init__.py │ │ └── casual_conv1d.py │ ├── rope.py │ └── triton_utils.py ├── vocab_parallel_embedding.py └── weight_prefetch.py ├── patch ├── __init__.py ├── platform │ ├── __init__.py │ ├── patch_config.py │ ├── patch_distributed.py │ ├── patch_ec_connector.py │ ├── patch_mamba_config.py │ ├── patch_multiproc_executor.py │ └── patch_sched_yield.py └── worker │ ├── __init__.py │ ├── patch_deepseek.py │ ├── patch_distributed.py │ ├── patch_minicpm.py │ ├── patch_multimodal_merge.py │ ├── patch_qwen2_5_omni.py │ ├── patch_qwen2_5_vl.py │ ├── patch_qwen3_vl.py │ ├── patch_roberta.py │ ├── patch_rope.py │ ├── patch_triton.py │ └── patch_weight_loader.py ├── platform.py ├── profiling_config.py ├── quantization ├── __init__.py ├── compressed_tensors │ ├── __init__.py │ └── compressed_tensors.py ├── quant_config.py ├── utils.py ├── w4a4_flatquant_dynamic.py ├── w4a8_dynamic.py ├── w8a8.py ├── w8a8_dynamic.py └── w8a8_pdmix.py ├── sample ├── __init__.py ├── logits_processor │ ├── __init__.py │ └── builtin.py ├── rejection_sampler.py └── sampler.py ├── spec_decode ├── __init__.py ├── eagle_proposer.py ├── interface.py ├── mtp_proposer.py ├── ngram_proposer.py └── suffix_proposer.py ├── torchair ├── __init__.py ├── models │ ├── __init__.py │ ├── qwen2.py │ ├── qwen3_moe.py │ ├── torchair_deepseek_mtp.py │ ├── torchair_deepseek_v2.py │ ├── torchair_deepseek_v3.py │ └── torchair_pangu_moe.py ├── ops │ ├── __init__.py │ ├── sequence_parallel.py │ ├── shared_weight_layer.py │ ├── torchair_activation.py │ ├── torchair_fused_moe.py │ ├── torchair_layernorm.py │ ├── torchair_rotary_embedding.py │ └── torchair_vocab_parallel_embedding.py ├── quantization │ ├── __init__.py │ ├── torchair_w4a8_dynamic.py │ └── torchair_w8a8_dynamic.py ├── torchair_attention.py ├── torchair_mla.py ├── torchair_model_runner.py ├── torchair_mtp_proposer.py ├── torchair_sfa.py ├── torchair_worker.py └── utils.py ├── utils.py └── worker ├── __init__.py ├── block_table.py ├── model_runner_v1.py ├── npu_input_batch.py └── worker_v1.py /.gemini/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.gemini/config.yaml -------------------------------------------------------------------------------- /.github/Dockerfile.buildwheel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/Dockerfile.buildwheel -------------------------------------------------------------------------------- /.github/Dockerfile.nightly.a2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/Dockerfile.nightly.a2 -------------------------------------------------------------------------------- /.github/Dockerfile.nightly.a3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/Dockerfile.nightly.a3 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/100-documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/100-documentation.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/110-user-story.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/110-user-story.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/200-installation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/200-installation.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/300-usage.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/300-usage.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/400-bug-report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/400-bug-report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/500-feature-request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/500-feature-request.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/600-new-model.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/600-new-model.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/700-performance-discussion.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/700-performance-discussion.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/750-RFC.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/750-RFC.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/800-others.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/800-others.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/900-release-checklist.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/ISSUE_TEMPLATE/900-release-checklist.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/actionlint.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/actionlint.yaml -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/format_pr_body.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/format_pr_body.sh -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/labeler.yml -------------------------------------------------------------------------------- /.github/workflows/_e2e_nightly_multi_node.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/_e2e_nightly_multi_node.yaml -------------------------------------------------------------------------------- /.github/workflows/_e2e_nightly_single_node.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/_e2e_nightly_single_node.yaml -------------------------------------------------------------------------------- /.github/workflows/_e2e_nightly_single_node_models.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/_e2e_nightly_single_node_models.yaml -------------------------------------------------------------------------------- /.github/workflows/_e2e_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/_e2e_test.yaml -------------------------------------------------------------------------------- /.github/workflows/_nightly_image_build.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/_nightly_image_build.yaml -------------------------------------------------------------------------------- /.github/workflows/format_pr_body.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/format_pr_body.yaml -------------------------------------------------------------------------------- /.github/workflows/image_310p_openeuler.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/image_310p_openeuler.yml -------------------------------------------------------------------------------- /.github/workflows/image_310p_ubuntu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/image_310p_ubuntu.yml -------------------------------------------------------------------------------- /.github/workflows/image_a3_openeuler.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/image_a3_openeuler.yml -------------------------------------------------------------------------------- /.github/workflows/image_a3_ubuntu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/image_a3_ubuntu.yml -------------------------------------------------------------------------------- /.github/workflows/image_openeuler.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/image_openeuler.yml -------------------------------------------------------------------------------- /.github/workflows/image_ubuntu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/image_ubuntu.yml -------------------------------------------------------------------------------- /.github/workflows/label_merge_conflict.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/label_merge_conflict.yml -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/labeler.yml -------------------------------------------------------------------------------- /.github/workflows/matchers/actionlint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/matchers/actionlint.json -------------------------------------------------------------------------------- /.github/workflows/matchers/mypy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/matchers/mypy.json -------------------------------------------------------------------------------- /.github/workflows/matchers/ruff.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/matchers/ruff.json -------------------------------------------------------------------------------- /.github/workflows/nightly_benchmarks.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/nightly_benchmarks.yaml -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /.github/workflows/release_code.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/release_code.yml -------------------------------------------------------------------------------- /.github/workflows/release_whl.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/release_whl.yml -------------------------------------------------------------------------------- /.github/workflows/reminder_comment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/reminder_comment.yml -------------------------------------------------------------------------------- /.github/workflows/vllm_ascend_doctest.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/vllm_ascend_doctest.yaml -------------------------------------------------------------------------------- /.github/workflows/vllm_ascend_test_310p.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/vllm_ascend_test_310p.yaml -------------------------------------------------------------------------------- /.github/workflows/vllm_ascend_test_full_vllm_main.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/vllm_ascend_test_full_vllm_main.yaml -------------------------------------------------------------------------------- /.github/workflows/vllm_ascend_test_nightly_a2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/vllm_ascend_test_nightly_a2.yaml -------------------------------------------------------------------------------- /.github/workflows/vllm_ascend_test_nightly_a3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/vllm_ascend_test_nightly_a3.yaml -------------------------------------------------------------------------------- /.github/workflows/vllm_ascend_test_pr_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/vllm_ascend_test_pr_full.yaml -------------------------------------------------------------------------------- /.github/workflows/vllm_ascend_test_pr_light.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/vllm_ascend_test_pr_light.yaml -------------------------------------------------------------------------------- /.github/workflows/vllm_ascend_test_report.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.github/workflows/vllm_ascend_test_report.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /DCO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/DCO -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/Dockerfile -------------------------------------------------------------------------------- /Dockerfile.310p: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/Dockerfile.310p -------------------------------------------------------------------------------- /Dockerfile.310p.openEuler: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/Dockerfile.310p.openEuler -------------------------------------------------------------------------------- /Dockerfile.a3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/Dockerfile.a3 -------------------------------------------------------------------------------- /Dockerfile.a3.openEuler: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/Dockerfile.a3.openEuler -------------------------------------------------------------------------------- /Dockerfile.openEuler: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/Dockerfile.openEuler -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/README.md -------------------------------------------------------------------------------- /README.zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/README.zh.md -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/benchmarks/README.md -------------------------------------------------------------------------------- /benchmarks/ops/ben_vocabparallelembedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/benchmarks/ops/ben_vocabparallelembedding.py -------------------------------------------------------------------------------- /benchmarks/requirements-bench.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | datasets 3 | modelscope 4 | tabulate -------------------------------------------------------------------------------- /benchmarks/scripts/convert_json_to_markdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/benchmarks/scripts/convert_json_to_markdown.py -------------------------------------------------------------------------------- /benchmarks/scripts/perf_result_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/benchmarks/scripts/perf_result_template.md -------------------------------------------------------------------------------- /benchmarks/scripts/run-performance-benchmarks.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/benchmarks/scripts/run-performance-benchmarks.sh -------------------------------------------------------------------------------- /benchmarks/tests/latency-tests.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/benchmarks/tests/latency-tests.json -------------------------------------------------------------------------------- /benchmarks/tests/serving-tests.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/benchmarks/tests/serving-tests.json -------------------------------------------------------------------------------- /benchmarks/tests/throughput-tests.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/benchmarks/tests/throughput-tests.json -------------------------------------------------------------------------------- /cmake/utils.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/cmake/utils.cmake -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/codecov.yml -------------------------------------------------------------------------------- /collect_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/collect_env.py -------------------------------------------------------------------------------- /csrc/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/aclnn_torch_adapter/NPUBridge.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/aclnn_torch_adapter/NPUBridge.cpp -------------------------------------------------------------------------------- /csrc/aclnn_torch_adapter/NPUBridge.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/aclnn_torch_adapter/NPUBridge.h -------------------------------------------------------------------------------- /csrc/aclnn_torch_adapter/NPUStorageImpl.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/aclnn_torch_adapter/NPUStorageImpl.cpp -------------------------------------------------------------------------------- /csrc/aclnn_torch_adapter/NPUStorageImpl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/aclnn_torch_adapter/NPUStorageImpl.h -------------------------------------------------------------------------------- /csrc/aclnn_torch_adapter/op_api_common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/aclnn_torch_adapter/op_api_common.h -------------------------------------------------------------------------------- /csrc/batch_matmul_transpose/op_host/batch_matmul_transpose.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/batch_matmul_transpose/op_host/batch_matmul_transpose.h -------------------------------------------------------------------------------- /csrc/batch_matmul_transpose/op_host/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/batch_matmul_transpose/op_host/common.h -------------------------------------------------------------------------------- /csrc/batch_matmul_transpose/op_host/common_tiling.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/batch_matmul_transpose/op_host/common_tiling.h -------------------------------------------------------------------------------- /csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp -------------------------------------------------------------------------------- /csrc/batch_matmul_transpose/op_host/tiling/tiling_data.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.h -------------------------------------------------------------------------------- /csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp -------------------------------------------------------------------------------- /csrc/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/build.sh -------------------------------------------------------------------------------- /csrc/build_aclnn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/build_aclnn.sh -------------------------------------------------------------------------------- /csrc/camem_allocator.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/camem_allocator.cpp -------------------------------------------------------------------------------- /csrc/cmake/config.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/cmake/config.cmake -------------------------------------------------------------------------------- /csrc/cmake/func.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/cmake/func.cmake -------------------------------------------------------------------------------- /csrc/cmake/intf.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/cmake/intf.cmake -------------------------------------------------------------------------------- /csrc/cmake/intf_pub.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/cmake/intf_pub.cmake -------------------------------------------------------------------------------- /csrc/cmake/modules/Findalog.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/cmake/modules/Findalog.cmake -------------------------------------------------------------------------------- /csrc/cmake/scripts/prepare.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/cmake/scripts/prepare.sh -------------------------------------------------------------------------------- /csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp -------------------------------------------------------------------------------- /csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h -------------------------------------------------------------------------------- /csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp -------------------------------------------------------------------------------- /csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h -------------------------------------------------------------------------------- /csrc/kernels/bgmv_expand.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/kernels/bgmv_expand.cpp -------------------------------------------------------------------------------- /csrc/kernels/bgmv_shrink.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/kernels/bgmv_shrink.cpp -------------------------------------------------------------------------------- /csrc/kernels/get_masked_input_and_mask_kernel.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/kernels/get_masked_input_and_mask_kernel.cpp -------------------------------------------------------------------------------- /csrc/kernels/math_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/kernels/math_utils.h -------------------------------------------------------------------------------- /csrc/kernels/pos_encoding_kernels.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/kernels/pos_encoding_kernels.cpp -------------------------------------------------------------------------------- /csrc/kernels/sgmv_expand.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/kernels/sgmv_expand.cpp -------------------------------------------------------------------------------- /csrc/kernels/sgmv_shrink.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/kernels/sgmv_shrink.cpp -------------------------------------------------------------------------------- /csrc/kernels/types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/kernels/types.h -------------------------------------------------------------------------------- /csrc/kernels/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/kernels/utils.h -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_host/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_host/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_host/lightning_indexer_def.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_host/lightning_indexer_def.cpp -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_host/lightning_indexer_proto.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_host/lightning_indexer_proto.cpp -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_host/lightning_indexer_tiling.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_host/lightning_indexer_tiling.h -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_kernel/lightning_indexer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_kernel/lightning_indexer.cpp -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_kernel/lightning_indexer_common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_kernel/lightning_indexer_common.h -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_kernel/lightning_indexer_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_kernel/lightning_indexer_kernel.h -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_kernel/lightning_indexer_service_cube.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_kernel/lightning_indexer_service_cube.h -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_kernel/lightning_indexer_service_vector.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_kernel/lightning_indexer_service_vector.h -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_kernel/lightning_indexer_template_tiling_key.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_kernel/lightning_indexer_template_tiling_key.h -------------------------------------------------------------------------------- /csrc/lightning_indexer/op_kernel/lightning_indexer_vector.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/lightning_indexer/op_kernel/lightning_indexer_vector.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_host/mla_preprocess.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_host/mla_preprocess.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/common.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/common_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/common_func.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/hardware.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/hardware.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/iterator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/iterator.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/layout.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/layout.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/mem.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/mem.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/mma.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/mma.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/set_fpc.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/simd.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/simd.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/kernel/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/kernel/utils.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/mla_preprocess.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/mla_preprocess.h -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp -------------------------------------------------------------------------------- /csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp -------------------------------------------------------------------------------- /csrc/ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/ops.h -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_host/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_host/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_host/sparse_flash_attention_def.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_host/sparse_flash_attention_def.cpp -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_host/sparse_flash_attention_proto.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_host/sparse_flash_attention_proto.cpp -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.h -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_kernel/sparse_flash_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention.cpp -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_common.h -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_kernel_mla.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_kernel_mla.h -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_cube_mla.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_cube_mla.h -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_vector_mla.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_vector_mla.h -------------------------------------------------------------------------------- /csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_template_tiling_key.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_template_tiling_key.h -------------------------------------------------------------------------------- /csrc/torch_binding.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/torch_binding.cpp -------------------------------------------------------------------------------- /csrc/torch_binding_meta.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/torch_binding_meta.cpp -------------------------------------------------------------------------------- /csrc/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils.h -------------------------------------------------------------------------------- /csrc/utils/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/CMakeLists.txt -------------------------------------------------------------------------------- /csrc/utils/inc/aclnn_util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/aclnn_util.h -------------------------------------------------------------------------------- /csrc/utils/inc/error/ops_error.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/error/ops_error.h -------------------------------------------------------------------------------- /csrc/utils/inc/fallback.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/fallback.h -------------------------------------------------------------------------------- /csrc/utils/inc/fallback_comm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/fallback_comm.h -------------------------------------------------------------------------------- /csrc/utils/inc/kernel/dropmask.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/kernel/dropmask.h -------------------------------------------------------------------------------- /csrc/utils/inc/kernel/pse.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/kernel/pse.h -------------------------------------------------------------------------------- /csrc/utils/inc/kernel/util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/kernel/util.h -------------------------------------------------------------------------------- /csrc/utils/inc/log/inner/dfx_base.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/log/inner/dfx_base.h -------------------------------------------------------------------------------- /csrc/utils/inc/log/ops_log.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/log/ops_log.h -------------------------------------------------------------------------------- /csrc/utils/inc/tiling/data_copy_transpose_tiling.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/tiling/data_copy_transpose_tiling.h -------------------------------------------------------------------------------- /csrc/utils/inc/tiling/data_copy_transpose_tiling_def.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/tiling/data_copy_transpose_tiling_def.h -------------------------------------------------------------------------------- /csrc/utils/inc/tiling/tiling_base.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/tiling/tiling_base.h -------------------------------------------------------------------------------- /csrc/utils/inc/tiling/tiling_templates_registry.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/tiling/tiling_templates_registry.h -------------------------------------------------------------------------------- /csrc/utils/inc/tiling/tiling_type.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/inc/tiling/tiling_type.h -------------------------------------------------------------------------------- /csrc/utils/src/fallback_comm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/csrc/utils/src/fallback_comm.cpp -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/requirements-docs.txt -------------------------------------------------------------------------------- /docs/requirements-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/requirements-test.txt -------------------------------------------------------------------------------- /docs/source/_templates/sections/header.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/_templates/sections/header.html -------------------------------------------------------------------------------- /docs/source/assets/deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/assets/deployment.png -------------------------------------------------------------------------------- /docs/source/assets/disaggregated_prefill_pull.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/assets/disaggregated_prefill_pull.png -------------------------------------------------------------------------------- /docs/source/assets/disaggregated_prefill_push.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/assets/disaggregated_prefill_push.png -------------------------------------------------------------------------------- /docs/source/assets/eplb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/assets/eplb.png -------------------------------------------------------------------------------- /docs/source/assets/multi_node_dp_deepseek.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/assets/multi_node_dp_deepseek.png -------------------------------------------------------------------------------- /docs/source/assets/multi_node_dp_kimi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/assets/multi_node_dp_kimi.png -------------------------------------------------------------------------------- /docs/source/assets/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/assets/workflow.png -------------------------------------------------------------------------------- /docs/source/community/contributors.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/community/contributors.md -------------------------------------------------------------------------------- /docs/source/community/governance.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/community/governance.md -------------------------------------------------------------------------------- /docs/source/community/user_stories/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/community/user_stories/index.md -------------------------------------------------------------------------------- /docs/source/community/user_stories/llamafactory.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/community/user_stories/llamafactory.md -------------------------------------------------------------------------------- /docs/source/community/versioning_policy.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/community/versioning_policy.md -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/developer_guide/contribution/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/contribution/index.md -------------------------------------------------------------------------------- /docs/source/developer_guide/contribution/multi_node_test.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/contribution/multi_node_test.md -------------------------------------------------------------------------------- /docs/source/developer_guide/contribution/testing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/contribution/testing.md -------------------------------------------------------------------------------- /docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md -------------------------------------------------------------------------------- /docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md -------------------------------------------------------------------------------- /docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md -------------------------------------------------------------------------------- /docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md -------------------------------------------------------------------------------- /docs/source/developer_guide/evaluation/accuracy_report/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/evaluation/accuracy_report/index.md -------------------------------------------------------------------------------- /docs/source/developer_guide/evaluation/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/evaluation/index.md -------------------------------------------------------------------------------- /docs/source/developer_guide/evaluation/using_ais_bench.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/evaluation/using_ais_bench.md -------------------------------------------------------------------------------- /docs/source/developer_guide/evaluation/using_evalscope.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/evaluation/using_evalscope.md -------------------------------------------------------------------------------- /docs/source/developer_guide/evaluation/using_lm_eval.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/evaluation/using_lm_eval.md -------------------------------------------------------------------------------- /docs/source/developer_guide/evaluation/using_opencompass.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/evaluation/using_opencompass.md -------------------------------------------------------------------------------- /docs/source/developer_guide/feature_guide/ACL_Graph.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/feature_guide/ACL_Graph.md -------------------------------------------------------------------------------- /docs/source/developer_guide/feature_guide/KV_Cache_Pool_Guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/feature_guide/KV_Cache_Pool_Guide.md -------------------------------------------------------------------------------- /docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md -------------------------------------------------------------------------------- /docs/source/developer_guide/feature_guide/Multi_Token_Prediction.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/feature_guide/Multi_Token_Prediction.md -------------------------------------------------------------------------------- /docs/source/developer_guide/feature_guide/add_custom_aclnn_op.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/feature_guide/add_custom_aclnn_op.md -------------------------------------------------------------------------------- /docs/source/developer_guide/feature_guide/disaggregated_prefill.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/feature_guide/disaggregated_prefill.md -------------------------------------------------------------------------------- /docs/source/developer_guide/feature_guide/eplb_swift_balancer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/feature_guide/eplb_swift_balancer.md -------------------------------------------------------------------------------- /docs/source/developer_guide/feature_guide/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/feature_guide/index.md -------------------------------------------------------------------------------- /docs/source/developer_guide/feature_guide/patch.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/feature_guide/patch.md -------------------------------------------------------------------------------- /docs/source/developer_guide/performance_and_debug/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/performance_and_debug/index.md -------------------------------------------------------------------------------- /docs/source/developer_guide/performance_and_debug/msprobe_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/performance_and_debug/msprobe_guide.md -------------------------------------------------------------------------------- /docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md -------------------------------------------------------------------------------- /docs/source/developer_guide/performance_and_debug/performance_benchmark.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/performance_and_debug/performance_benchmark.md -------------------------------------------------------------------------------- /docs/source/developer_guide/performance_and_debug/profile_execute_duration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md -------------------------------------------------------------------------------- /docs/source/developer_guide/performance_and_debug/service_profiling_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md -------------------------------------------------------------------------------- /docs/source/faqs.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/faqs.md -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/index.md -------------------------------------------------------------------------------- /docs/source/installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/installation.md -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/profile_execute_duration.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/profile_execute_duration.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/faqs.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/installation.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/installation.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po -------------------------------------------------------------------------------- /docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po -------------------------------------------------------------------------------- /docs/source/logos/vllm-ascend-logo-text-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/logos/vllm-ascend-logo-text-dark.png -------------------------------------------------------------------------------- /docs/source/logos/vllm-ascend-logo-text-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/logos/vllm-ascend-logo-text-light.png -------------------------------------------------------------------------------- /docs/source/quick_start.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/quick_start.md -------------------------------------------------------------------------------- /docs/source/tutorials/DeepSeek-V3.1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/DeepSeek-V3.1.md -------------------------------------------------------------------------------- /docs/source/tutorials/DeepSeek-V3.2-Exp.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/DeepSeek-V3.2-Exp.md -------------------------------------------------------------------------------- /docs/source/tutorials/Qwen2.5-Omni.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/Qwen2.5-Omni.md -------------------------------------------------------------------------------- /docs/source/tutorials/Qwen3-Coder-30B-A3B.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/Qwen3-Coder-30B-A3B.md -------------------------------------------------------------------------------- /docs/source/tutorials/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/index.md -------------------------------------------------------------------------------- /docs/source/tutorials/multi_node.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/multi_node.md -------------------------------------------------------------------------------- /docs/source/tutorials/multi_node_kimi.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/multi_node_kimi.md -------------------------------------------------------------------------------- /docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/multi_node_pd_disaggregation_mooncake.md -------------------------------------------------------------------------------- /docs/source/tutorials/multi_node_qwen3vl.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/multi_node_qwen3vl.md -------------------------------------------------------------------------------- /docs/source/tutorials/multi_node_ray.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/multi_node_ray.md -------------------------------------------------------------------------------- /docs/source/tutorials/multi_npu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/multi_npu.md -------------------------------------------------------------------------------- /docs/source/tutorials/multi_npu_moge.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/multi_npu_moge.md -------------------------------------------------------------------------------- /docs/source/tutorials/multi_npu_quantization.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/multi_npu_quantization.md -------------------------------------------------------------------------------- /docs/source/tutorials/multi_npu_qwen3_moe.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/multi_npu_qwen3_moe.md -------------------------------------------------------------------------------- /docs/source/tutorials/multi_npu_qwen3_next.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/multi_npu_qwen3_next.md -------------------------------------------------------------------------------- /docs/source/tutorials/single_node_300i.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/single_node_300i.md -------------------------------------------------------------------------------- /docs/source/tutorials/single_node_pd_disaggregation_llmdatadist.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/single_node_pd_disaggregation_llmdatadist.md -------------------------------------------------------------------------------- /docs/source/tutorials/single_npu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/single_npu.md -------------------------------------------------------------------------------- /docs/source/tutorials/single_npu_qwen2.5_vl.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/single_npu_qwen2.5_vl.md -------------------------------------------------------------------------------- /docs/source/tutorials/single_npu_qwen2_audio.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/single_npu_qwen2_audio.md -------------------------------------------------------------------------------- /docs/source/tutorials/single_npu_qwen3_embedding.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/single_npu_qwen3_embedding.md -------------------------------------------------------------------------------- /docs/source/tutorials/single_npu_qwen3_quantization.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/single_npu_qwen3_quantization.md -------------------------------------------------------------------------------- /docs/source/tutorials/single_npu_qwen3_w4a4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/tutorials/single_npu_qwen3_w4a4.md -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/additional_config.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/configuration/additional_config.md -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/env_vars.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/configuration/env_vars.md -------------------------------------------------------------------------------- /docs/source/user_guide/configuration/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/configuration/index.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/dynamic_batch.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/dynamic_batch.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/eplb_swift_balancer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/eplb_swift_balancer.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/external_dp.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/external_dp.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/graph_mode.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/graph_mode.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/images/eplb_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/images/eplb_img.png -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/images/netloader_flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/images/netloader_flowchart.png -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/images/netloader_timing_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/images/netloader_timing_diagram.png -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/images/structured_output_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/images/structured_output_1.png -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/index.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/kv_pool.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/kv_pool.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/lora.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/lora.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/netloader.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/netloader.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/quantization-llm-compressor.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/quantization-llm-compressor.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/quantization.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/quantization.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/sleep_mode.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/sleep_mode.md -------------------------------------------------------------------------------- /docs/source/user_guide/feature_guide/structured_output.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/feature_guide/structured_output.md -------------------------------------------------------------------------------- /docs/source/user_guide/release_notes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/release_notes.md -------------------------------------------------------------------------------- /docs/source/user_guide/support_matrix/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/support_matrix/index.md -------------------------------------------------------------------------------- /docs/source/user_guide/support_matrix/supported_features.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/support_matrix/supported_features.md -------------------------------------------------------------------------------- /docs/source/user_guide/support_matrix/supported_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/docs/source/user_guide/support_matrix/supported_models.md -------------------------------------------------------------------------------- /examples/chat_templates/template_qwen2_audio.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/chat_templates/template_qwen2_audio.jinja -------------------------------------------------------------------------------- /examples/disaggregated_prefill_v1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/disaggregated_prefill_v1/README.md -------------------------------------------------------------------------------- /examples/disaggregated_prefill_v1/gen_ranktable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/disaggregated_prefill_v1/gen_ranktable.py -------------------------------------------------------------------------------- /examples/disaggregated_prefill_v1/gen_ranktable.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/disaggregated_prefill_v1/gen_ranktable.sh -------------------------------------------------------------------------------- /examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py -------------------------------------------------------------------------------- /examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py -------------------------------------------------------------------------------- /examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md -------------------------------------------------------------------------------- /examples/disaggregated_prefill_v1/run_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/disaggregated_prefill_v1/run_server.sh -------------------------------------------------------------------------------- /examples/eplb/eplb_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/eplb/eplb_deepseek.py -------------------------------------------------------------------------------- /examples/eplb/eplb_strategy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/eplb/eplb_strategy.py -------------------------------------------------------------------------------- /examples/external_online_dp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/external_online_dp/README.md -------------------------------------------------------------------------------- /examples/external_online_dp/dp_load_balance_proxy_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/external_online_dp/dp_load_balance_proxy_server.py -------------------------------------------------------------------------------- /examples/external_online_dp/launch_online_dp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/external_online_dp/launch_online_dp.py -------------------------------------------------------------------------------- /examples/external_online_dp/run_dp_template.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/external_online_dp/run_dp_template.sh -------------------------------------------------------------------------------- /examples/offline_data_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/offline_data_parallel.py -------------------------------------------------------------------------------- /examples/offline_disaggregated_prefill_npu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/offline_disaggregated_prefill_npu.py -------------------------------------------------------------------------------- /examples/offline_embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/offline_embed.py -------------------------------------------------------------------------------- /examples/offline_external_launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/offline_external_launcher.py -------------------------------------------------------------------------------- /examples/offline_inference_audio_language.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/offline_inference_audio_language.py -------------------------------------------------------------------------------- /examples/offline_inference_npu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/offline_inference_npu.py -------------------------------------------------------------------------------- /examples/offline_inference_npu_long_seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/offline_inference_npu_long_seq.py -------------------------------------------------------------------------------- /examples/offline_inference_npu_tp2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/offline_inference_npu_tp2.py -------------------------------------------------------------------------------- /examples/offline_inference_sleep_mode_npu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/offline_inference_sleep_mode_npu.py -------------------------------------------------------------------------------- /examples/offline_weight_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/offline_weight_load.py -------------------------------------------------------------------------------- /examples/prompt_embed_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/prompt_embed_inference.py -------------------------------------------------------------------------------- /examples/prompt_embedding_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/prompt_embedding_inference.py -------------------------------------------------------------------------------- /examples/quantization/llm-compressor/w8a8_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/quantization/llm-compressor/w8a8_int8.py -------------------------------------------------------------------------------- /examples/quantization/llm-compressor/w8a8_int8_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/quantization/llm-compressor/w8a8_int8_dynamic.py -------------------------------------------------------------------------------- /examples/run_dp_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/examples/run_dp_server.sh -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/format.sh -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/mypy.ini -------------------------------------------------------------------------------- /packages.txt: -------------------------------------------------------------------------------- 1 | git 2 | vim 3 | wget 4 | jq 5 | curl 6 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /requirements-lint.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/requirements-lint.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/310p/test_offline_inference_310p.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/310p/test_offline_inference_310p.py -------------------------------------------------------------------------------- /tests/e2e/310p/test_offline_inference_parallel_310p.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/310p/test_offline_inference_parallel_310p.py -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/common.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/common.sh -------------------------------------------------------------------------------- /tests/e2e/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/conftest.py -------------------------------------------------------------------------------- /tests/e2e/doctests/001-quickstart-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/doctests/001-quickstart-test.sh -------------------------------------------------------------------------------- /tests/e2e/doctests/002-pip-binary-installation-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/doctests/002-pip-binary-installation-test.sh -------------------------------------------------------------------------------- /tests/e2e/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/model_utils.py -------------------------------------------------------------------------------- /tests/e2e/models/configs/DeepSeek-V2-Lite.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/DeepSeek-V2-Lite.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/InternVL3_5-8B-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/InternVL3_5-8B-hf.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Meta-Llama-3.1-8B-Instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Meta-Llama-3.1-8B-Instruct.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Molmo-7B-D-0924.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Molmo-7B-D-0924.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen2-VL-7B-Instruct.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen2.5-Omni-7B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen2.5-Omni-7B.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen2.5-VL-7B-Instruct.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen3-30B-A3B-W8A8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen3-30B-A3B-W8A8.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen3-30B-A3B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen3-30B-A3B.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen3-8B-Base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen3-8B-Base.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen3-8B-W8A8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen3-8B-W8A8.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen3-8B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen3-8B.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen3-Next-80B-A3B-Instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen3-Next-80B-A3B-Instruct.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/accuracy.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/accuracy.txt -------------------------------------------------------------------------------- /tests/e2e/models/configs/gemma-2-9b-it.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/gemma-2-9b-it.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/gemma-3-4b-it.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/gemma-3-4b-it.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/internlm-7b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/internlm-7b.yaml -------------------------------------------------------------------------------- /tests/e2e/models/configs/llava-1.5-7b-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/configs/llava-1.5-7b-hf.yaml -------------------------------------------------------------------------------- /tests/e2e/models/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/conftest.py -------------------------------------------------------------------------------- /tests/e2e/models/report_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/report_template.md -------------------------------------------------------------------------------- /tests/e2e/models/test_lm_eval_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/models/test_lm_eval_correctness.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_aclgraph_capture_replay.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_aclgraph_capture_replay.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_chunk_gated_delta_rule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_chunk_gated_delta_rule.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_data_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_data_parallel.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_data_parallel_tp2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_data_parallel_tp2.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_expert_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_expert_parallel.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_external_launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_external_launcher.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_full_graph_mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_full_graph_mode.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_fused_moe_allgather_ep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_fused_moe_allgather_ep.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_ilama_lora_tp2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_ilama_lora_tp2.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_offline_inference_distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_offline_inference_distributed.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_pipeline_parallel.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_prefix_caching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_prefix_caching.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_quantization.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_qwen3_moe.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_qwen3_next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_qwen3_next.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_shared_expert_dp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_shared_expert_dp.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_single_request_aclgraph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_single_request_aclgraph.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_torchair_graph_mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_torchair_graph_mode.py -------------------------------------------------------------------------------- /tests/e2e/multicard/test_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/multicard/test_weight_loader.py -------------------------------------------------------------------------------- /tests/e2e/nightly/features/test_mtpx_deepseek_r1_0528_w8a8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/features/test_mtpx_deepseek_r1_0528_w8a8.py -------------------------------------------------------------------------------- /tests/e2e/nightly/features/test_prefix_cache_deepseek_r1_0528_w8a8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/features/test_prefix_cache_deepseek_r1_0528_w8a8.py -------------------------------------------------------------------------------- /tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py -------------------------------------------------------------------------------- /tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_deepseek_v3_2_exp_w8a8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_deepseek_v3_2_exp_w8a8.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_glm4_5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_glm4_5.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_qwen2_5_vl_32b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_qwen2_5_vl_7b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_qwen3_235b_w8a8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_qwen3_235b_w8a8.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_qwen3_30b_w8a8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_qwen3_30b_w8a8.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_qwen3_32b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_qwen3_32b.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_qwen3_32b_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_qwen3_32b_int8.py -------------------------------------------------------------------------------- /tests/e2e/nightly/models/test_qwq_32b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/models/test_qwq_32b.py -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-EPLB.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-EPLB.yaml -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/models/DeepSeek-V3_2-Exp-bf16.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3_2-Exp-bf16.yaml -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8-EPLB.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8-EPLB.yaml -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/multi_node_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/multi_node_config.py -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/config/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/config/utils.py -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/scripts/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/scripts/run.sh -------------------------------------------------------------------------------- /tests/e2e/nightly/multi_node/test_multi_node.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/multi_node/test_multi_node.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/test_batch_matmul_transpose.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/test_batch_matmul_transpose.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/test_bgmv_expand.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/test_bgmv_expand.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/test_bgmv_shrink.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/test_bgmv_shrink.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/test_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/test_fused_moe.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/test_gating_top_k_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/test_gating_top_k_softmax.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/test_grouped_matmul_swiglu_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/test_grouped_matmul_swiglu_quant.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/test_mla_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/test_mla_preprocess.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/test_rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/test_rotary_embedding.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/test_vocabparallelembedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/test_vocabparallelembedding.py -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/triton/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/nightly/ops/triton/test_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/nightly/ops/triton/test_rope.py -------------------------------------------------------------------------------- /tests/e2e/pd_disaggreate/run_edge_case_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/pd_disaggreate/run_edge_case_test.sh -------------------------------------------------------------------------------- /tests/e2e/pd_disaggreate/setup_pd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/pd_disaggreate/setup_pd.sh -------------------------------------------------------------------------------- /tests/e2e/pd_disaggreate/test_edge_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/pd_disaggreate/test_edge_cases.py -------------------------------------------------------------------------------- /tests/e2e/pd_disaggreate/test_pd_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/pd_disaggreate/test_pd_e2e.py -------------------------------------------------------------------------------- /tests/e2e/prompts/example.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/prompts/example.txt -------------------------------------------------------------------------------- /tests/e2e/run_disagg_pd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/run_disagg_pd.sh -------------------------------------------------------------------------------- /tests/e2e/run_doctests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/run_doctests.sh -------------------------------------------------------------------------------- /tests/e2e/singlecard/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/singlecard/multi-modal/test_internvl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/multi-modal/test_internvl.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_aclgraph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_aclgraph.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_aclgraph_mem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_aclgraph_mem.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_bge_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_bge_model.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_camem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_camem.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_completion_with_prompt_embeds.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_completion_with_prompt_embeds.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_embedding.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_embedding_aclgraph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_embedding_aclgraph.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_guided_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_guided_decoding.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_ilama_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_ilama_lora.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_multistream_overlap_shared_expert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_profile_execute_duration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_profile_execute_duration.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_quantization.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_sampler.py -------------------------------------------------------------------------------- /tests/e2e/singlecard/test_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/singlecard/test_vlm.py -------------------------------------------------------------------------------- /tests/e2e/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/utils.py -------------------------------------------------------------------------------- /tests/e2e/vllm_interface/singlecard/test_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/vllm_interface/singlecard/test_sampler.py -------------------------------------------------------------------------------- /tests/e2e/vllm_interface/vllm_test.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/e2e/vllm_interface/vllm_test.cfg -------------------------------------------------------------------------------- /tests/ut/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/ut/attention/test_attention_mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/attention/test_attention_mask.py -------------------------------------------------------------------------------- /tests/ut/attention/test_attention_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/attention/test_attention_v1.py -------------------------------------------------------------------------------- /tests/ut/attention/test_mla_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/attention/test_mla_v1.py -------------------------------------------------------------------------------- /tests/ut/attention/test_sfa_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/attention/test_sfa_v1.py -------------------------------------------------------------------------------- /tests/ut/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/base.py -------------------------------------------------------------------------------- /tests/ut/compilation/test_acl_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/compilation/test_acl_graph.py -------------------------------------------------------------------------------- /tests/ut/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/conftest.py -------------------------------------------------------------------------------- /tests/ut/core/test_schedule_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/core/test_schedule_config.py -------------------------------------------------------------------------------- /tests/ut/core/test_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/core/test_scheduler.py -------------------------------------------------------------------------------- /tests/ut/device_allocator/test_camem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/device_allocator/test_camem.py -------------------------------------------------------------------------------- /tests/ut/distributed/device_communicators/test_pyhccl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/distributed/device_communicators/test_pyhccl.py -------------------------------------------------------------------------------- /tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py -------------------------------------------------------------------------------- /tests/ut/distributed/mooncake/test_config_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/distributed/mooncake/test_config_data.py -------------------------------------------------------------------------------- /tests/ut/distributed/test_communicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/distributed/test_communicator.py -------------------------------------------------------------------------------- /tests/ut/distributed/test_determin_expert_map_all.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/ut/distributed/test_parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/distributed/test_parallel_state.py -------------------------------------------------------------------------------- /tests/ut/eplb/adaptor/test_abstract_adaptor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/eplb/adaptor/test_abstract_adaptor.py -------------------------------------------------------------------------------- /tests/ut/eplb/core/policy/test_policy_abstract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/eplb/core/policy/test_policy_abstract.py -------------------------------------------------------------------------------- /tests/ut/eplb/core/policy/test_policy_dynamic_ep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py -------------------------------------------------------------------------------- /tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py -------------------------------------------------------------------------------- /tests/ut/eplb/core/policy/test_policy_factor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/eplb/core/policy/test_policy_factor.py -------------------------------------------------------------------------------- /tests/ut/eplb/core/test_eplb_device_transfer_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/eplb/core/test_eplb_device_transfer_loader.py -------------------------------------------------------------------------------- /tests/ut/eplb/core/test_eplb_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/eplb/core/test_eplb_utils.py -------------------------------------------------------------------------------- /tests/ut/fake_weight/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/fake_weight/config.json -------------------------------------------------------------------------------- /tests/ut/kv_connector/test_llmdatadist_connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/kv_connector/test_llmdatadist_connector.py -------------------------------------------------------------------------------- /tests/ut/kv_connector/test_mooncake_connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/kv_connector/test_mooncake_connector.py -------------------------------------------------------------------------------- /tests/ut/kv_connector/test_mooncake_layerwise_connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/kv_connector/test_mooncake_layerwise_connector.py -------------------------------------------------------------------------------- /tests/ut/kv_connector/test_remote_decode_lifecycle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/kv_connector/test_remote_decode_lifecycle.py -------------------------------------------------------------------------------- /tests/ut/kv_connector/test_remote_prefill_lifecycle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/kv_connector/test_remote_prefill_lifecycle.py -------------------------------------------------------------------------------- /tests/ut/kv_connector/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/kv_connector/utils.py -------------------------------------------------------------------------------- /tests/ut/model_loader/netloader/test_netloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/model_loader/netloader/test_netloader.py -------------------------------------------------------------------------------- /tests/ut/model_loader/netloader/test_netloader_elastic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/model_loader/netloader/test_netloader_elastic.py -------------------------------------------------------------------------------- /tests/ut/model_loader/netloader/test_netloader_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/model_loader/netloader/test_netloader_load.py -------------------------------------------------------------------------------- /tests/ut/model_loader/netloader/test_netloader_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/model_loader/netloader/test_netloader_utils.py -------------------------------------------------------------------------------- /tests/ut/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/ut/models/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/models/conftest.py -------------------------------------------------------------------------------- /tests/ut/models/test_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/models/test_mla.py -------------------------------------------------------------------------------- /tests/ut/ops/expert_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/expert_map.json -------------------------------------------------------------------------------- /tests/ut/ops/test_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_activation.py -------------------------------------------------------------------------------- /tests/ut/ops/test_comm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_comm_utils.py -------------------------------------------------------------------------------- /tests/ut/ops/test_expert_load_balancer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_expert_load_balancer.py -------------------------------------------------------------------------------- /tests/ut/ops/test_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_fused_moe.py -------------------------------------------------------------------------------- /tests/ut/ops/test_layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_layernorm.py -------------------------------------------------------------------------------- /tests/ut/ops/test_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_linear.py -------------------------------------------------------------------------------- /tests/ut/ops/test_moe_comm_method.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_moe_comm_method.py -------------------------------------------------------------------------------- /tests/ut/ops/test_prepare_finalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_prepare_finalize.py -------------------------------------------------------------------------------- /tests/ut/ops/test_rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_rotary_embedding.py -------------------------------------------------------------------------------- /tests/ut/ops/test_token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_token_dispatcher.py -------------------------------------------------------------------------------- /tests/ut/ops/test_vocab_parallel_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/ops/test_vocab_parallel_embedding.py -------------------------------------------------------------------------------- /tests/ut/patch/worker/patch_common/test_patch_distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/patch/worker/patch_common/test_patch_distributed.py -------------------------------------------------------------------------------- /tests/ut/patch/worker/patch_common/test_patch_minicpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/patch/worker/patch_common/test_patch_minicpm.py -------------------------------------------------------------------------------- /tests/ut/quantization/test_quant_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/quantization/test_quant_config.py -------------------------------------------------------------------------------- /tests/ut/quantization/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/quantization/test_utils.py -------------------------------------------------------------------------------- /tests/ut/quantization/test_w4a4_flatquant_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/quantization/test_w4a4_flatquant_dynamic.py -------------------------------------------------------------------------------- /tests/ut/quantization/test_w4a8_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/quantization/test_w4a8_dynamic.py -------------------------------------------------------------------------------- /tests/ut/quantization/test_w8a8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/quantization/test_w8a8.py -------------------------------------------------------------------------------- /tests/ut/quantization/test_w8a8_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/quantization/test_w8a8_dynamic.py -------------------------------------------------------------------------------- /tests/ut/sample/logits_processor/test_builtin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/sample/logits_processor/test_builtin.py -------------------------------------------------------------------------------- /tests/ut/sample/test_rejection_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/sample/test_rejection_sampler.py -------------------------------------------------------------------------------- /tests/ut/sample/test_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/sample/test_sampler.py -------------------------------------------------------------------------------- /tests/ut/spec_decode/test_eagle_proposer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/spec_decode/test_eagle_proposer.py -------------------------------------------------------------------------------- /tests/ut/test_ascend_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/test_ascend_config.py -------------------------------------------------------------------------------- /tests/ut/test_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/test_envs.py -------------------------------------------------------------------------------- /tests/ut/test_platform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/test_platform.py -------------------------------------------------------------------------------- /tests/ut/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/test_utils.py -------------------------------------------------------------------------------- /tests/ut/torchair/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/ut/torchair/models/test_qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/models/test_qwen3_moe.py -------------------------------------------------------------------------------- /tests/ut/torchair/models/test_torchair_deepseek_mtp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/models/test_torchair_deepseek_mtp.py -------------------------------------------------------------------------------- /tests/ut/torchair/models/test_torchair_deepseek_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/models/test_torchair_deepseek_v2.py -------------------------------------------------------------------------------- /tests/ut/torchair/ops/test_torchair_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/ops/test_torchair_fused_moe.py -------------------------------------------------------------------------------- /tests/ut/torchair/ops/test_torchair_rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/ops/test_torchair_rotary_embedding.py -------------------------------------------------------------------------------- /tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py -------------------------------------------------------------------------------- /tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/quantization/test_torchair_w8a8_dynamic.py -------------------------------------------------------------------------------- /tests/ut/torchair/test_torchair_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/test_torchair_attention.py -------------------------------------------------------------------------------- /tests/ut/torchair/test_torchair_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/test_torchair_mla.py -------------------------------------------------------------------------------- /tests/ut/torchair/test_torchair_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/test_torchair_model_runner.py -------------------------------------------------------------------------------- /tests/ut/torchair/test_torchair_mtp_proposer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/test_torchair_mtp_proposer.py -------------------------------------------------------------------------------- /tests/ut/torchair/test_torchair_sfa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/test_torchair_sfa.py -------------------------------------------------------------------------------- /tests/ut/torchair/test_torchair_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/test_torchair_worker.py -------------------------------------------------------------------------------- /tests/ut/torchair/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/torchair/test_utils.py -------------------------------------------------------------------------------- /tests/ut/worker/test_input_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/worker/test_input_batch.py -------------------------------------------------------------------------------- /tests/ut/worker/test_model_runner_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/worker/test_model_runner_v1.py -------------------------------------------------------------------------------- /tests/ut/worker/test_worker_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tests/ut/worker/test_worker_v1.py -------------------------------------------------------------------------------- /tools/actionlint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/actionlint.sh -------------------------------------------------------------------------------- /tools/aisbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/aisbench.py -------------------------------------------------------------------------------- /tools/check_python_src_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/check_python_src_init.py -------------------------------------------------------------------------------- /tools/check_repo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/check_repo.sh -------------------------------------------------------------------------------- /tools/enforce_regex_import.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/enforce_regex_import.py -------------------------------------------------------------------------------- /tools/mooncake_installer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/mooncake_installer.sh -------------------------------------------------------------------------------- /tools/mypy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/mypy.sh -------------------------------------------------------------------------------- /tools/png-lint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/png-lint.sh -------------------------------------------------------------------------------- /tools/send_mm_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/send_mm_request.py -------------------------------------------------------------------------------- /tools/send_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/send_request.py -------------------------------------------------------------------------------- /tools/shellcheck.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/shellcheck.sh -------------------------------------------------------------------------------- /tools/sphinx-lint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/tools/sphinx-lint.sh -------------------------------------------------------------------------------- /typos.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/typos.toml -------------------------------------------------------------------------------- /vllm_ascend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/__init__.py -------------------------------------------------------------------------------- /vllm_ascend/_cann_ops_custom/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/_cann_ops_custom/.gitkeep -------------------------------------------------------------------------------- /vllm_ascend/ascend_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ascend_config.py -------------------------------------------------------------------------------- /vllm_ascend/ascend_forward_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ascend_forward_context.py -------------------------------------------------------------------------------- /vllm_ascend/attention/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/attention/attention_mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/attention/attention_mask.py -------------------------------------------------------------------------------- /vllm_ascend/attention/attention_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/attention/attention_v1.py -------------------------------------------------------------------------------- /vllm_ascend/attention/mla_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/attention/mla_v1.py -------------------------------------------------------------------------------- /vllm_ascend/attention/sfa_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/attention/sfa_v1.py -------------------------------------------------------------------------------- /vllm_ascend/attention/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/attention/utils.py -------------------------------------------------------------------------------- /vllm_ascend/compilation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/compilation/acl_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/compilation/acl_graph.py -------------------------------------------------------------------------------- /vllm_ascend/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/core/recompute_schedule_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/core/recompute_schedule_config.py -------------------------------------------------------------------------------- /vllm_ascend/core/recompute_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/core/recompute_scheduler.py -------------------------------------------------------------------------------- /vllm_ascend/core/schedule_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/core/schedule_config.py -------------------------------------------------------------------------------- /vllm_ascend/core/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/core/scheduler.py -------------------------------------------------------------------------------- /vllm_ascend/core/scheduler_dynamic_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/core/scheduler_dynamic_batch.py -------------------------------------------------------------------------------- /vllm_ascend/cpu_binding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/cpu_binding.py -------------------------------------------------------------------------------- /vllm_ascend/device_allocator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/device_allocator/camem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/device_allocator/camem.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/__init__.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/communicator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/communicator.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/cpu_offload_connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/cpu_offload_connector.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/cpu_offload_manager/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/cpu_offload_manager/metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/cpu_offload_manager/metadata.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/device_communicators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/distributed/device_communicators/pyhccl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/device_communicators/pyhccl.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/kvpool/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /vllm_ascend/distributed/kvpool/ascend_store_connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/kvpool/ascend_store_connector.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/kvpool/backend/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /vllm_ascend/distributed/kvpool/backend/backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/kvpool/backend/backend.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/kvpool/backend/memcache_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/kvpool/backend/memcache_backend.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/kvpool/backend/mooncake_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/kvpool/backend/mooncake_backend.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/kvpool/config_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/kvpool/config_data.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/kvpool/kv_transfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/kvpool/kv_transfer.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/kvpool/pool_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/kvpool/pool_scheduler.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/kvpool/pool_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/kvpool/pool_worker.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/llmdatadist_c_mgr_connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/mooncake_connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/mooncake_connector.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/mooncake_layerwise_connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/mooncake_layerwise_connector.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/mooncake_transfer_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/mooncake_transfer_engine.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/parallel_state.py -------------------------------------------------------------------------------- /vllm_ascend/distributed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/distributed/utils.py -------------------------------------------------------------------------------- /vllm_ascend/envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/envs.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/eplb/adaptor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/eplb/adaptor/abstract_adaptor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/adaptor/abstract_adaptor.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/adaptor/vllm_adaptor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/adaptor/vllm_adaptor.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/eplb_device_transfer_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/core/eplb_device_transfer_loader.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/eplb_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/core/eplb_utils.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/eplb_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/core/eplb_worker.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/policy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/policy/policy_abstract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/core/policy/policy_abstract.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/policy/policy_dynamic_ep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/policy/policy_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/core/policy/policy_factory.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/policy/policy_flashlb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/core/policy/policy_flashlb.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/core/policy/policy_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/core/policy/policy_random.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/eplb_updator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/eplb_updator.py -------------------------------------------------------------------------------- /vllm_ascend/eplb/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/eplb/utils.py -------------------------------------------------------------------------------- /vllm_ascend/kv_offload/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/kv_offload/cpu_npu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/kv_offload/cpu_npu.py -------------------------------------------------------------------------------- /vllm_ascend/kv_offload/npu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/kv_offload/npu.py -------------------------------------------------------------------------------- /vllm_ascend/lora/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/lora/lora_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/lora/lora_ops.py -------------------------------------------------------------------------------- /vllm_ascend/lora/punica_npu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/lora/punica_npu.py -------------------------------------------------------------------------------- /vllm_ascend/lora/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/lora/utils.py -------------------------------------------------------------------------------- /vllm_ascend/meta_registration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/meta_registration.py -------------------------------------------------------------------------------- /vllm_ascend/model_loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/model_loader/netloader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/model_loader/netloader/__init__.py -------------------------------------------------------------------------------- /vllm_ascend/model_loader/netloader/executor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/model_loader/netloader/executor/elastic_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/model_loader/netloader/executor/elastic_load.py -------------------------------------------------------------------------------- /vllm_ascend/model_loader/netloader/interaction/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/model_loader/netloader/interaction/elastic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/model_loader/netloader/interaction/elastic.py -------------------------------------------------------------------------------- /vllm_ascend/model_loader/netloader/load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/model_loader/netloader/load.py -------------------------------------------------------------------------------- /vllm_ascend/model_loader/netloader/netloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/model_loader/netloader/netloader.py -------------------------------------------------------------------------------- /vllm_ascend/model_loader/netloader/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/model_loader/netloader/utils.py -------------------------------------------------------------------------------- /vllm_ascend/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/__init__.py -------------------------------------------------------------------------------- /vllm_ascend/ops/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/activation.py -------------------------------------------------------------------------------- /vllm_ascend/ops/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/attention.py -------------------------------------------------------------------------------- /vllm_ascend/ops/expert_load_balancer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/expert_load_balancer.py -------------------------------------------------------------------------------- /vllm_ascend/ops/fused_moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/ops/fused_moe/comm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/fused_moe/comm_utils.py -------------------------------------------------------------------------------- /vllm_ascend/ops/fused_moe/experts_selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/fused_moe/experts_selector.py -------------------------------------------------------------------------------- /vllm_ascend/ops/fused_moe/fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/fused_moe/fused_moe.py -------------------------------------------------------------------------------- /vllm_ascend/ops/fused_moe/moe_comm_method.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/fused_moe/moe_comm_method.py -------------------------------------------------------------------------------- /vllm_ascend/ops/fused_moe/moe_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/fused_moe/moe_mlp.py -------------------------------------------------------------------------------- /vllm_ascend/ops/fused_moe/prepare_finalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/fused_moe/prepare_finalize.py -------------------------------------------------------------------------------- /vllm_ascend/ops/fused_moe/token_dispatcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/fused_moe/token_dispatcher.py -------------------------------------------------------------------------------- /vllm_ascend/ops/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/layernorm.py -------------------------------------------------------------------------------- /vllm_ascend/ops/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/linear.py -------------------------------------------------------------------------------- /vllm_ascend/ops/linear_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/linear_op.py -------------------------------------------------------------------------------- /vllm_ascend/ops/mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/mla.py -------------------------------------------------------------------------------- /vllm_ascend/ops/register_custom_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/register_custom_ops.py -------------------------------------------------------------------------------- /vllm_ascend/ops/rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/rotary_embedding.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/chunk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/fla/chunk.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/chunk_delta_h.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/fla/chunk_delta_h.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/chunk_o.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/fla/chunk_o.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/chunk_scaled_dot_kkt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/fla/chunk_scaled_dot_kkt.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/cumsum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/fla/cumsum.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/layernorm_guard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/fla/layernorm_guard.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/sigmoid_gating.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/fla/sigmoid_gating.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/solve_tril.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/fla/solve_tril.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/fla/utils.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/fla/wy_fast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/fla/wy_fast.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/mamba/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/mamba/casual_conv1d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/mamba/casual_conv1d.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/rope.py -------------------------------------------------------------------------------- /vllm_ascend/ops/triton/triton_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/triton/triton_utils.py -------------------------------------------------------------------------------- /vllm_ascend/ops/vocab_parallel_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/vocab_parallel_embedding.py -------------------------------------------------------------------------------- /vllm_ascend/ops/weight_prefetch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/ops/weight_prefetch.py -------------------------------------------------------------------------------- /vllm_ascend/patch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/__init__.py -------------------------------------------------------------------------------- /vllm_ascend/patch/platform/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/platform/__init__.py -------------------------------------------------------------------------------- /vllm_ascend/patch/platform/patch_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/platform/patch_config.py -------------------------------------------------------------------------------- /vllm_ascend/patch/platform/patch_distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/platform/patch_distributed.py -------------------------------------------------------------------------------- /vllm_ascend/patch/platform/patch_ec_connector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/platform/patch_ec_connector.py -------------------------------------------------------------------------------- /vllm_ascend/patch/platform/patch_mamba_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/platform/patch_mamba_config.py -------------------------------------------------------------------------------- /vllm_ascend/patch/platform/patch_multiproc_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/platform/patch_multiproc_executor.py -------------------------------------------------------------------------------- /vllm_ascend/patch/platform/patch_sched_yield.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/platform/patch_sched_yield.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/__init__.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_deepseek.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_distributed.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_minicpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_minicpm.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_multimodal_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_multimodal_merge.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_qwen2_5_omni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_qwen2_5_omni.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_qwen2_5_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_qwen2_5_vl.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_qwen3_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_qwen3_vl.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_roberta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_roberta.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_rope.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_triton.py -------------------------------------------------------------------------------- /vllm_ascend/patch/worker/patch_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/patch/worker/patch_weight_loader.py -------------------------------------------------------------------------------- /vllm_ascend/platform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/platform.py -------------------------------------------------------------------------------- /vllm_ascend/profiling_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/profiling_config.py -------------------------------------------------------------------------------- /vllm_ascend/quantization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/quantization/compressed_tensors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/quantization/compressed_tensors/compressed_tensors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/quantization/compressed_tensors/compressed_tensors.py -------------------------------------------------------------------------------- /vllm_ascend/quantization/quant_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/quantization/quant_config.py -------------------------------------------------------------------------------- /vllm_ascend/quantization/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/quantization/utils.py -------------------------------------------------------------------------------- /vllm_ascend/quantization/w4a4_flatquant_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/quantization/w4a4_flatquant_dynamic.py -------------------------------------------------------------------------------- /vllm_ascend/quantization/w4a8_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/quantization/w4a8_dynamic.py -------------------------------------------------------------------------------- /vllm_ascend/quantization/w8a8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/quantization/w8a8.py -------------------------------------------------------------------------------- /vllm_ascend/quantization/w8a8_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/quantization/w8a8_dynamic.py -------------------------------------------------------------------------------- /vllm_ascend/quantization/w8a8_pdmix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/quantization/w8a8_pdmix.py -------------------------------------------------------------------------------- /vllm_ascend/sample/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/sample/logits_processor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/sample/logits_processor/__init__.py -------------------------------------------------------------------------------- /vllm_ascend/sample/logits_processor/builtin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/sample/logits_processor/builtin.py -------------------------------------------------------------------------------- /vllm_ascend/sample/rejection_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/sample/rejection_sampler.py -------------------------------------------------------------------------------- /vllm_ascend/sample/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/sample/sampler.py -------------------------------------------------------------------------------- /vllm_ascend/spec_decode/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/spec_decode/__init__.py -------------------------------------------------------------------------------- /vllm_ascend/spec_decode/eagle_proposer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/spec_decode/eagle_proposer.py -------------------------------------------------------------------------------- /vllm_ascend/spec_decode/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/spec_decode/interface.py -------------------------------------------------------------------------------- /vllm_ascend/spec_decode/mtp_proposer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/spec_decode/mtp_proposer.py -------------------------------------------------------------------------------- /vllm_ascend/spec_decode/ngram_proposer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/spec_decode/ngram_proposer.py -------------------------------------------------------------------------------- /vllm_ascend/spec_decode/suffix_proposer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/spec_decode/suffix_proposer.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/torchair/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/torchair/models/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/models/qwen2.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/models/qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/models/qwen3_moe.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/models/torchair_deepseek_mtp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/models/torchair_deepseek_mtp.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/models/torchair_deepseek_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/models/torchair_deepseek_v2.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/models/torchair_deepseek_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/models/torchair_deepseek_v3.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/models/torchair_pangu_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/models/torchair_pangu_moe.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/torchair/ops/sequence_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/ops/sequence_parallel.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/ops/shared_weight_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/ops/shared_weight_layer.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/ops/torchair_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/ops/torchair_activation.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/ops/torchair_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/ops/torchair_fused_moe.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/ops/torchair_layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/ops/torchair_layernorm.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/ops/torchair_rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/ops/torchair_rotary_embedding.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/ops/torchair_vocab_parallel_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/ops/torchair_vocab_parallel_embedding.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/quantization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/torchair_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/torchair_attention.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/torchair_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/torchair_mla.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/torchair_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/torchair_model_runner.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/torchair_mtp_proposer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/torchair_mtp_proposer.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/torchair_sfa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/torchair_sfa.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/torchair_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/torchair_worker.py -------------------------------------------------------------------------------- /vllm_ascend/torchair/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/torchair/utils.py -------------------------------------------------------------------------------- /vllm_ascend/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/utils.py -------------------------------------------------------------------------------- /vllm_ascend/worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_ascend/worker/block_table.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/worker/block_table.py -------------------------------------------------------------------------------- /vllm_ascend/worker/model_runner_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/worker/model_runner_v1.py -------------------------------------------------------------------------------- /vllm_ascend/worker/npu_input_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/worker/npu_input_batch.py -------------------------------------------------------------------------------- /vllm_ascend/worker/worker_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-ascend/HEAD/vllm_ascend/worker/worker_v1.py --------------------------------------------------------------------------------