├── .clang-format-ignore ├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .editorconfig ├── .github ├── CI_PERMISSIONS.json ├── CODEOWNERS ├── FOLDER_README.md ├── ISSUE_TEMPLATE │ ├── 1-bug-report.yml │ └── 2-feature-request.yml ├── MAINTAINER.md ├── labeler.yml ├── pull_request_template.md ├── update_ci_permission.py └── workflows │ ├── auto-format.yml │ ├── auto-tune.yml │ ├── bot-bump-kernel-version-to-sglang.yml │ ├── bot-bump-kernel-version.yml │ ├── bot-bump-sglang-version.yml │ ├── cancel-all-pending-pr-test-runs.yml │ ├── cancel-pr-workflow-on-merge.yml │ ├── ci-failure-monitor.yml │ ├── ci-monitor.yml │ ├── close-inactive-issues.yml │ ├── execute-notebook.yml │ ├── labeler.yml │ ├── lint.yml │ ├── nightly-release-gateway.yml │ ├── nightly-test-amd.yml │ ├── nightly-test-intel.yml │ ├── nightly-test-npu.yml │ ├── nightly-test-nvidia.yml │ ├── open-pr-copy-from-oss.yml │ ├── open-pr-copy-to-oss.yml │ ├── pr-benchmark-rust.yml │ ├── pr-gate.yml │ ├── pr-test-amd.yml │ ├── pr-test-npu.yml │ ├── pr-test-pd-router.yml │ ├── pr-test-rust.yml │ ├── pr-test-xeon.yml │ ├── pr-test-xpu.yml │ ├── pr-test.yml │ ├── release-docker-amd-nightly.yml │ ├── release-docker-amd.yml │ ├── release-docker-cu13.yml │ ├── release-docker-dev.yml │ ├── release-docker-gateway.yml │ ├── release-docker-npu-nightly.yml │ ├── release-docker-npu.yml │ ├── release-docker-xeon.yml │ ├── release-docker.yml │ ├── release-docs.yml │ ├── release-fake-tag.yml │ ├── release-pypi-gateway.yml │ ├── release-pypi.yml │ ├── release-whl-kernel.yml │ ├── slash-command-handler.yml │ └── stress-test.yml ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── 3rdparty └── amd │ ├── profiling │ ├── PROFILING.md │ ├── client.sh │ ├── install_rpd.sh │ ├── loadTracer.sh │ ├── rpd.patch │ ├── rpd_profile_server_enable.patch │ ├── rpd_profile_server_enable_wCPU_activities.patch │ ├── server.sh │ └── torch_profiler.patch │ └── tuning │ ├── TUNING.md │ └── benchmark_moe_rocm.py ├── CODE_OF_CONDUCT.md ├── LICENSE ├── Makefile ├── README.md ├── assets ├── logo.png ├── logo.svg ├── logo_square.png └── logo_square.svg ├── benchmark ├── bench_attention_sink │ └── bench_attention_sink_triton.py ├── bench_in_batch_prefix │ └── bench_in_batch_prefix.py ├── benchmark_batch │ ├── benchmark_batch.py │ └── benchmark_tokenizer.py ├── benchmark_vllm_060 │ └── README.md ├── blog_v0_2 │ ├── 405b_sglang.sh │ ├── 405b_trt.sh │ ├── 405b_vllm.sh │ ├── README.md │ └── config.md ├── boolq │ ├── README.md │ ├── bench_sglang.py │ ├── convert_parquet_to_json.py │ └── parquet_to_json.sh ├── ceval │ ├── README.md │ └── bench_sglang.py ├── deepseek_v3 │ └── README.md ├── dspy │ ├── README.md │ └── bench_dspy_intro.py ├── generative_agents │ ├── README.md │ ├── agent_functions.py │ ├── bench_other.py │ └── bench_sglang.py ├── gpt_oss │ └── README.md ├── gsm8k │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── hellaswag │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── hf3fs │ ├── bench.sh │ ├── bench_client.py │ ├── bench_storage.py │ └── bench_zerocopy.py ├── hicache │ ├── README.md │ ├── bench_long_context.py │ ├── bench_mix.py │ ├── bench_mix.sh │ ├── bench_multiturn.py │ ├── bench_serving.py │ ├── data_processing.py │ ├── download.sh │ ├── nextqa.py │ └── perf.py ├── json_decode_regex │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── build_dataset.py ├── json_jump_forward │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ ├── build_dataset.py │ └── dataset.txt ├── json_schema │ ├── README.md │ └── bench_sglang.py ├── kernels │ ├── all_reduce │ │ ├── benchmark_aiter.py │ │ ├── benchmark_mscclpp.py │ │ └── benchmark_torch_symm_mem.py │ ├── decoding_attention_triton │ │ └── triton_flashinfer_cudnn.py │ ├── deepep │ │ ├── deepep_utils.py │ │ └── tuning_deepep.py │ ├── deepseek │ │ ├── README.md │ │ ├── benchmark_deepgemm_fp8_gemm.py │ │ ├── benchmark_deepgemm_fp8_gemm_blackwell.py │ │ └── benchmark_deepgemm_fp8_group_gemm.py │ ├── elementwise │ │ └── benchmark_concat_mla.py │ ├── flashinfer_allreduce_fusion │ │ ├── README.md │ │ └── benchmark_fused_collective.py │ ├── fused_moe_triton │ │ ├── README.md │ │ ├── benchmark_sglang_fused_moe_triton.py │ │ ├── benchmark_torch_compile_fused_moe.py │ │ ├── benchmark_vllm_vs_sglang_fused_moe_triton.py │ │ ├── common_utils.py │ │ ├── tuning_client.py │ │ ├── tuning_fused_moe_triton.py │ │ ├── tuning_fused_moe_triton_sep.py │ │ └── tuning_text.json │ ├── quantization │ │ ├── README.md │ │ ├── bench_fp4_quant.py │ │ ├── bench_int8_quant.py │ │ └── tuning_block_wise_kernel.py │ ├── scheduler_batch │ │ ├── benchmark_get_last_loc_triton.py │ │ └── benchmark_write_req_to_token_pool_triton.py │ └── sliding_window_attention_triton │ │ └── bench_triton_swa_kernel.py ├── line_retrieval │ ├── README.md │ ├── bench_sglang.py │ └── gen_data.py ├── llava_bench │ ├── README.md │ ├── bench_hf_llava_bench.sh │ ├── bench_hf_mme.sh │ ├── bench_sglang.py │ ├── bench_sglang_mme.sh │ ├── download_images.py │ └── questions.jsonl ├── llm_judge │ ├── README.md │ ├── articles.jsonl │ ├── bench_other.py │ └── bench_sglang.py ├── long_json_decode │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── build_dataset.py ├── lora │ ├── launch_server.py │ └── lora_bench.py ├── mmlu │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── download_data.sh ├── mmmu │ ├── README.md │ ├── bench_hf.py │ ├── bench_sglang.py │ ├── data_utils.py │ ├── eval_utils.py │ └── prompt_format.yaml ├── mtbench │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── bench_sglang_eagle.py ├── multi_chain_reasoning │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── multi_document_qa │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── build_dataset.py ├── multi_turn_chat │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ ├── data_gen.py │ └── long_prompt_multi_turn.py ├── prefill_only │ ├── bench_embeddings.py │ ├── bench_score.py │ └── util.py ├── react │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── hotpotqa_100.jsonl ├── reasoning_benchmark │ ├── README.md │ ├── answer_extraction.py │ ├── bench_sglang.py │ ├── eval_utils.py │ └── figure │ │ ├── Acc_histplot.png │ │ └── SE_numtries.png ├── tip_suggestion │ ├── .gitignore │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ ├── lmql_funcs.py │ └── topic.jsonl ├── tree_of_thought_deep │ ├── README.md │ ├── bench_other.py │ ├── bench_sglang.py │ └── lmql_funcs.py └── tree_of_thought_v0 │ ├── README.md │ ├── bench_other.py │ └── bench_sglang.py ├── docker ├── Dockerfile ├── compose.yaml ├── configs │ ├── .gitconfig │ ├── .tmux.conf │ ├── .vimrc │ ├── .zshrc │ └── yank ├── diffusion.Dockerfile ├── gateway.Dockerfile ├── k8s-sglang-distributed-sts.yaml ├── k8s-sglang-service.yaml ├── npu.Dockerfile ├── rocm.Dockerfile ├── sagemaker.Dockerfile ├── serve ├── xeon.Dockerfile └── xpu.Dockerfile ├── docs ├── Makefile ├── README.md ├── _static │ ├── css │ │ ├── custom_log.css │ │ └── readthedocs.css │ └── image │ │ ├── logo.ico │ │ └── logo.png ├── advanced_features │ ├── attention_backend.md │ ├── checkpoint_engine.md │ ├── deterministic_inference.md │ ├── dp_for_multi_modal_encoder.md │ ├── expert_parallelism.md │ ├── forward_hooks.md │ ├── hicache.rst │ ├── hicache_best_practices.md │ ├── hicache_design.md │ ├── hyperparameter_tuning.md │ ├── lora.ipynb │ ├── observability.md │ ├── pd_disaggregation.md │ ├── quantization.md │ ├── quantized_kv_cache.md │ ├── router.md │ ├── separate_reasoning.ipynb │ ├── server_arguments.md │ ├── speculative_decoding.ipynb │ ├── structured_outputs.ipynb │ ├── structured_outputs_for_reasoning_models.ipynb │ ├── tool_parser.ipynb │ └── vlm_query.ipynb ├── basic_usage │ ├── deepseek_v3.md │ ├── deepseek_v32.md │ ├── gpt_oss.md │ ├── llama4.md │ ├── native_api.ipynb │ ├── offline_engine_api.ipynb │ ├── openai_api.rst │ ├── openai_api_completions.ipynb │ ├── openai_api_embeddings.ipynb │ ├── openai_api_vision.ipynb │ ├── popular_model_usage.rst │ ├── qwen3.md │ ├── qwen3_vl.md │ ├── sampling_params.md │ └── send_request.ipynb ├── conf.py ├── deploy.py ├── developer_guide │ ├── bench_serving.md │ ├── benchmark_and_profiling.md │ ├── contribution_guide.md │ ├── development_guide_using_docker.md │ ├── release_process.md │ └── setup_github_runner.md ├── get_started │ └── install.md ├── index.rst ├── platforms │ ├── amd_gpu.md │ ├── ascend_npu.md │ ├── ascend_npu_deepseek_example.md │ ├── ascend_npu_qwen3_examples.md │ ├── ascend_npu_support.rst │ ├── cpu_server.md │ ├── nvidia_jetson.md │ ├── tpu.md │ └── xpu.md ├── references │ ├── custom_chat_template.md │ ├── environment_variables.md │ ├── faq.md │ ├── frontend │ │ ├── choices_methods.md │ │ ├── frontend_index.rst │ │ └── frontend_tutorial.ipynb │ ├── learn_more.md │ ├── mindspore_models.md │ ├── multi_node_deployment │ │ ├── deploy_on_k8s.md │ │ ├── lws_pd │ │ │ ├── lws-examples │ │ │ │ ├── d-svc.yaml │ │ │ │ ├── d.yaml │ │ │ │ ├── lb.yaml │ │ │ │ ├── p-svc.yaml │ │ │ │ └── p.yaml │ │ │ └── lws_pd_deploy.md │ │ ├── multi_node.md │ │ ├── multi_node_index.rst │ │ └── rbg_pd │ │ │ └── deepseekv32_pd.md │ ├── post_training_integration.md │ ├── production_metrics.md │ ├── production_request_trace.md │ └── torch_compile_cache.md ├── requirements.txt ├── serve.sh ├── supported_models │ ├── classify_models.md │ ├── embedding_models.md │ ├── generative_models.md │ ├── modelscope.md │ ├── multimodal_language_models.md │ ├── rerank_models.md │ ├── reward_models.md │ ├── support_new_models.md │ └── transformers_fallback.md └── wrap_run_llm.py ├── examples ├── assets │ ├── .gitignore │ └── example_image.png ├── chat_template │ ├── tool_chat_template_deepseekr1.jinja │ ├── tool_chat_template_deepseekv3.jinja │ ├── tool_chat_template_deepseekv31.jinja │ ├── tool_chat_template_deepseekv32.jinja │ ├── tool_chat_template_llama3.1_json.jinja │ ├── tool_chat_template_llama4_pythonic.jinja │ └── vision_template_sarashina_vl.jinja ├── checkpoint_engine │ └── update.py ├── frontend_language │ ├── quick_start │ │ ├── anthropic_example_chat.py │ │ ├── anthropic_example_complete.py │ │ ├── azure_openai_example_chat.py │ │ ├── gemini_example_chat.py │ │ ├── gemini_example_complete.py │ │ ├── gemini_example_multimodal_chat.py │ │ ├── images │ │ │ ├── cat.jpeg │ │ │ └── dog.jpeg │ │ ├── local_example_chat.py │ │ ├── local_example_complete.py │ │ ├── local_example_llava_next.py │ │ ├── openai_example_chat.py │ │ ├── openai_example_complete.py │ │ ├── openai_example_n.py │ │ ├── openai_example_o1.py │ │ ├── openrouter_example_chat.py │ │ ├── together_example_chat.py │ │ └── together_example_complete.py │ └── usage │ │ ├── chinese_regex.py │ │ ├── choices_logprob.py │ │ ├── cot_decoding.py │ │ ├── json_decode.py │ │ ├── json_logprobs.py │ │ ├── llava_video │ │ ├── srt_example_llava_v.py │ │ └── srt_example_llava_v.sh │ │ ├── openai_chat_speculative.py │ │ ├── openai_speculative.py │ │ ├── parallel_sample.py │ │ ├── rag_using_parea │ │ └── trace_and_evaluate_rag_using_parea.ipynb │ │ ├── readme_examples.py │ │ ├── sgl_gen_min_tokens.py │ │ ├── streaming.py │ │ └── triton │ │ ├── Dockerfile │ │ ├── README.md │ │ └── models │ │ └── character_generation │ │ ├── 1 │ │ └── model.py │ │ └── config.pbtxt ├── monitoring │ ├── README.md │ ├── docker-compose.yaml │ ├── grafana │ │ ├── dashboards │ │ │ ├── config │ │ │ │ └── dashboard.yaml │ │ │ └── json │ │ │ │ └── sglang-dashboard.json │ │ └── datasources │ │ │ └── datasource.yaml │ ├── opentelemetry.yaml │ ├── prometheus.yaml │ └── tracing_compose.yaml ├── profiler │ └── nsys_profile_tools │ │ ├── README.md │ │ ├── gputrc2graph.py │ │ └── sglang_engine_model.json ├── runtime │ ├── README.md │ ├── engine │ │ ├── custom_server.py │ │ ├── embedding.py │ │ ├── fastapi_engine_inference.py │ │ ├── launch_engine.py │ │ ├── offline_batch_inference.py │ │ ├── offline_batch_inference_async.py │ │ ├── offline_batch_inference_eagle.py │ │ ├── offline_batch_inference_qwen_1m.py │ │ ├── offline_batch_inference_vlm.py │ │ ├── readme.md │ │ ├── save_remote_state.py │ │ └── save_sharded_state.py │ ├── hidden_states │ │ ├── hidden_states_engine.py │ │ └── hidden_states_server.py │ ├── lora.py │ ├── multimodal │ │ ├── llama3_llava_server.py │ │ ├── llava_onevision_server.py │ │ ├── pixtral_server.py │ │ └── qwen_llava_server.py │ ├── multimodal_embedding.py │ ├── openai_chat_with_response_prefill.py │ ├── reward_model.py │ ├── token_in_token_out │ │ ├── token_in_token_out_llm_engine.py │ │ ├── token_in_token_out_llm_server.py │ │ ├── token_in_token_out_vlm_engine.py │ │ └── token_in_token_out_vlm_server.py │ └── vertex_predict.py ├── sagemaker │ └── deploy_and_serve_endpoint.py └── usage │ └── modelopt_quantize_and_export.py ├── python ├── pyproject.toml ├── pyproject_cpu.toml ├── pyproject_other.toml ├── pyproject_xpu.toml └── sglang │ ├── README.md │ ├── __init__.py │ ├── bench_offline_throughput.py │ ├── bench_one_batch.py │ ├── bench_one_batch_server.py │ ├── bench_serving.py │ ├── check_env.py │ ├── cli │ ├── __init__.py │ ├── generate.py │ ├── main.py │ ├── serve.py │ └── utils.py │ ├── compile_deep_gemm.py │ ├── eval │ ├── llama3_eval.py │ └── loogle_eval.py │ ├── global_config.py │ ├── jit_kernel │ ├── .clang-format │ ├── csrc │ │ ├── cuda_wait_value.cuh │ │ └── hicache.cuh │ ├── cuda_wait_value.py │ ├── hicache.py │ ├── include │ │ └── sgl_kernel │ │ │ ├── tensor.h │ │ │ ├── utils.cuh │ │ │ ├── utils.h │ │ │ └── warp.cuh │ └── utils.py │ ├── lang │ ├── api.py │ ├── backend │ │ ├── anthropic.py │ │ ├── base_backend.py │ │ ├── litellm.py │ │ ├── openai.py │ │ ├── runtime_endpoint.py │ │ └── vertexai.py │ ├── chat_template.py │ ├── choices.py │ ├── interpreter.py │ ├── ir.py │ └── tracer.py │ ├── launch_server.py │ ├── multimodal_gen │ ├── README.md │ ├── __init__.py │ ├── benchmarks │ │ └── compare_perf.py │ ├── configs │ │ ├── __init__.py │ │ ├── backend │ │ │ └── vmoba │ │ │ │ ├── wan_1.3B_77_448_832.json │ │ │ │ └── wan_1.3B_77_480_832.json │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── dits │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── flux.py │ │ │ │ ├── hunyuanvideo.py │ │ │ │ ├── qwenimage.py │ │ │ │ ├── stepvideo.py │ │ │ │ ├── wanvideo.py │ │ │ │ └── zimage.py │ │ │ ├── encoders │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── clip.py │ │ │ │ ├── llama.py │ │ │ │ ├── qwen_image.py │ │ │ │ └── t5.py │ │ │ └── vaes │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── flux.py │ │ │ │ ├── hunyuanvae.py │ │ │ │ ├── qwenimage.py │ │ │ │ ├── stepvideovae.py │ │ │ │ └── wanvae.py │ │ ├── pipeline_configs │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── flux.py │ │ │ ├── flux_finetuned.py │ │ │ ├── hunyuan.py │ │ │ ├── qwen_image.py │ │ │ ├── stepvideo.py │ │ │ ├── wan.py │ │ │ └── zimage.py │ │ ├── sample │ │ │ ├── __init__.py │ │ │ ├── flux.py │ │ │ ├── hunyuan.py │ │ │ ├── qwenimage.py │ │ │ ├── sampling_params.py │ │ │ ├── stepvideo.py │ │ │ ├── teacache.py │ │ │ ├── wan.py │ │ │ └── zimage.py │ │ └── utils.py │ ├── csrc │ │ └── attn │ │ │ └── vmoba_attn │ │ │ ├── README.md │ │ │ ├── setup.py │ │ │ ├── tests │ │ │ └── test_vmoba_attn.py │ │ │ └── vmoba │ │ │ ├── __init__.py │ │ │ └── vmoba.py │ ├── docs │ │ ├── cache_dit.md │ │ ├── cli.md │ │ ├── contributing.md │ │ ├── environment_variables.md │ │ ├── install.md │ │ ├── openai_api.md │ │ ├── support_matrix.md │ │ └── support_new_models.md │ ├── envs.py │ ├── registry.py │ ├── runtime │ │ ├── distributed │ │ │ ├── __init__.py │ │ │ ├── communication_op.py │ │ │ ├── device_communicators │ │ │ │ ├── __init__.py │ │ │ │ ├── base_device_communicator.py │ │ │ │ ├── cpu_communicator.py │ │ │ │ ├── cuda_communicator.py │ │ │ │ ├── pynccl.py │ │ │ │ └── pynccl_wrapper.py │ │ │ ├── group_coordinator.py │ │ │ ├── parallel_state.py │ │ │ └── utils.py │ │ ├── entrypoints │ │ │ ├── __init__.py │ │ │ ├── cli │ │ │ │ ├── __init__.py │ │ │ │ ├── cli_types.py │ │ │ │ ├── generate.py │ │ │ │ ├── main.py │ │ │ │ ├── serve.py │ │ │ │ └── utils.py │ │ │ ├── diffusion_generator.py │ │ │ ├── http_server.py │ │ │ ├── openai │ │ │ │ ├── common_api.py │ │ │ │ ├── image_api.py │ │ │ │ ├── protocol.py │ │ │ │ ├── stores.py │ │ │ │ ├── utils.py │ │ │ │ └── video_api.py │ │ │ └── utils.py │ │ ├── launch_server.py │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── activation.py │ │ │ ├── attention │ │ │ │ ├── STA_configuration.py │ │ │ │ ├── __init__.py │ │ │ │ ├── backends │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── aiter.py │ │ │ │ │ ├── attention_backend.py │ │ │ │ │ ├── flash_attn.py │ │ │ │ │ ├── flash_attn_2.py │ │ │ │ │ ├── sage_attn.py │ │ │ │ │ ├── sage_attn3.py │ │ │ │ │ ├── sdpa.py │ │ │ │ │ ├── sliding_tile_attn.py │ │ │ │ │ ├── video_sparse_attn.py │ │ │ │ │ └── vmoba.py │ │ │ │ ├── layer.py │ │ │ │ └── selector.py │ │ │ ├── custom_op.py │ │ │ ├── layernorm.py │ │ │ ├── linear.py │ │ │ ├── lora │ │ │ │ └── linear.py │ │ │ ├── mlp.py │ │ │ ├── quantization │ │ │ │ ├── __init__.py │ │ │ │ └── base_config.py │ │ │ ├── rotary_embedding.py │ │ │ ├── triton_ops.py │ │ │ ├── usp.py │ │ │ ├── utils.py │ │ │ ├── visual_embedding.py │ │ │ └── vocab_parallel_embedding.py │ │ ├── loader │ │ │ ├── component_loader.py │ │ │ ├── fsdp_load.py │ │ │ ├── utils.py │ │ │ └── weight_utils.py │ │ ├── managers │ │ │ ├── forward_context.py │ │ │ ├── gpu_worker.py │ │ │ └── scheduler.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── dits │ │ │ │ ├── base.py │ │ │ │ ├── causal_wanvideo.py │ │ │ │ ├── flux.py │ │ │ │ ├── flux_2.py │ │ │ │ ├── hunyuanvideo.py │ │ │ │ ├── qwen_image.py │ │ │ │ ├── stepvideo.py │ │ │ │ ├── wanvideo.py │ │ │ │ └── zimage.py │ │ │ ├── encoders │ │ │ │ ├── base.py │ │ │ │ ├── bert.py │ │ │ │ ├── clip.py │ │ │ │ ├── llama.py │ │ │ │ ├── mistral_3.py │ │ │ │ ├── qwen2_5vl.py │ │ │ │ ├── stepllm.py │ │ │ │ ├── t5.py │ │ │ │ └── vision.py │ │ │ ├── parameter.py │ │ │ ├── registry.py │ │ │ ├── schedulers │ │ │ │ ├── base.py │ │ │ │ ├── scheduling_flow_match_euler_discrete.py │ │ │ │ ├── scheduling_flow_unipc_multistep.py │ │ │ │ ├── scheduling_self_forcing_flow_match.py │ │ │ │ └── scheduling_unipc_multistep.py │ │ │ ├── utils.py │ │ │ ├── vaes │ │ │ │ ├── autoencoder.py │ │ │ │ ├── autoencoder_kl_flux2.py │ │ │ │ ├── autoencoder_kl_qwenimage.py │ │ │ │ ├── common.py │ │ │ │ ├── hunyuanvae.py │ │ │ │ ├── stepvideovae.py │ │ │ │ └── wanvae.py │ │ │ └── vision_utils.py │ │ ├── pipelines │ │ │ ├── __init__.py │ │ │ ├── flux.py │ │ │ ├── flux_2.py │ │ │ ├── hunyuan_pipeline.py │ │ │ ├── qwen_image.py │ │ │ ├── stepvideo_pipeline.py │ │ │ ├── wan_causal_dmd_pipeline.py │ │ │ ├── wan_dmd_pipeline.py │ │ │ ├── wan_i2v_dmd_pipeline.py │ │ │ ├── wan_i2v_pipeline.py │ │ │ ├── wan_pipeline.py │ │ │ └── zimage_pipeline.py │ │ ├── pipelines_core │ │ │ ├── __init__.py │ │ │ ├── composed_pipeline_base.py │ │ │ ├── executors │ │ │ │ ├── parallel_executor.py │ │ │ │ ├── pipeline_executor.py │ │ │ │ └── sync_executor.py │ │ │ ├── lora_pipeline.py │ │ │ ├── schedule_batch.py │ │ │ └── stages │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── causal_denoising.py │ │ │ │ ├── conditioning.py │ │ │ │ ├── decoding.py │ │ │ │ ├── denoising.py │ │ │ │ ├── denoising_dmd.py │ │ │ │ ├── encoding.py │ │ │ │ ├── image_encoding.py │ │ │ │ ├── input_validation.py │ │ │ │ ├── latent_preparation.py │ │ │ │ ├── stepvideo_encoding.py │ │ │ │ ├── text_encoding.py │ │ │ │ ├── timestep_preparation.py │ │ │ │ └── validators.py │ │ ├── platforms │ │ │ ├── __init__.py │ │ │ ├── cpu.py │ │ │ ├── cuda.py │ │ │ ├── interface.py │ │ │ ├── mps.py │ │ │ └── rocm.py │ │ ├── scheduler_client.py │ │ ├── server_args.py │ │ ├── sync_scheduler_client.py │ │ └── utils │ │ │ ├── cache_dit_integration.py │ │ │ ├── common.py │ │ │ ├── distributed.py │ │ │ ├── hf_diffusers_utils.py │ │ │ ├── logging_utils.py │ │ │ ├── perf_logger.py │ │ │ └── profiler.py │ ├── test │ │ ├── __init__.py │ │ ├── cli │ │ │ ├── test_generate_common.py │ │ │ ├── test_generate_t2i_perf.py │ │ │ ├── test_generate_t2v_perf.py │ │ │ ├── test_generate_ti2v_perf.py │ │ │ └── test_serve.py │ │ ├── run_suite.py │ │ ├── server │ │ │ ├── conftest.py │ │ │ ├── perf_baselines.json │ │ │ ├── test_server_2_gpu_a.py │ │ │ ├── test_server_2_gpu_b.py │ │ │ ├── test_server_a.py │ │ │ ├── test_server_b.py │ │ │ ├── test_server_common.py │ │ │ ├── test_server_utils.py │ │ │ └── testcase_configs.py │ │ ├── slack_utils.py │ │ ├── test_files │ │ │ ├── launch_flux.json │ │ │ └── launch_wan.json │ │ ├── test_offline_api.py │ │ └── test_utils.py │ ├── third_party │ │ ├── __init__.py │ │ └── pynvml.py │ └── utils.py │ ├── profiler.py │ ├── srt │ ├── batch_invariant_ops │ │ ├── __init__.py │ │ └── batch_invariant_ops.py │ ├── batch_overlap │ │ ├── operations.py │ │ ├── operations_strategy.py │ │ ├── single_batch_overlap.py │ │ └── two_batch_overlap.py │ ├── checkpoint_engine │ │ ├── __init__.py │ │ ├── checkpoint_engine_worker.py │ │ └── update.py │ ├── compilation │ │ ├── backend.py │ │ ├── compilation_config.py │ │ ├── compilation_counter.py │ │ ├── compile.py │ │ ├── compiler_interface.py │ │ ├── cuda_piecewise_backend.py │ │ ├── fix_functionalization.py │ │ ├── fx_utils.py │ │ ├── inductor_pass.py │ │ ├── pass_manager.py │ │ └── piecewise_context_manager.py │ ├── configs │ │ ├── __init__.py │ │ ├── chatglm.py │ │ ├── dbrx.py │ │ ├── deepseek_ocr.py │ │ ├── deepseekvl2.py │ │ ├── device_config.py │ │ ├── dots_ocr.py │ │ ├── dots_vlm.py │ │ ├── exaone.py │ │ ├── falcon_h1.py │ │ ├── internvl.py │ │ ├── janus_pro.py │ │ ├── jet_nemotron.py │ │ ├── jet_vlm.py │ │ ├── kimi_linear.py │ │ ├── kimi_vl.py │ │ ├── kimi_vl_moonvit.py │ │ ├── load_config.py │ │ ├── longcat_flash.py │ │ ├── mamba_utils.py │ │ ├── model_config.py │ │ ├── modelopt_config.py │ │ ├── nano_nemotron_vl.py │ │ ├── nemotron_h.py │ │ ├── olmo3.py │ │ ├── points_v15_chat.py │ │ ├── qwen3_next.py │ │ ├── qwen3_omni.py │ │ ├── qwen3_vl.py │ │ ├── radio.py │ │ ├── step3_vl.py │ │ ├── update_config.py │ │ └── utils.py │ ├── connector │ │ ├── __init__.py │ │ ├── base_connector.py │ │ ├── redis.py │ │ ├── remote_instance.py │ │ ├── s3.py │ │ ├── serde │ │ │ ├── __init__.py │ │ │ ├── safe_serde.py │ │ │ └── serde.py │ │ └── utils.py │ ├── constants.py │ ├── constrained │ │ ├── base_grammar_backend.py │ │ ├── llguidance_backend.py │ │ ├── outlines_backend.py │ │ ├── outlines_jump_forward.py │ │ ├── reasoner_grammar_backend.py │ │ ├── triton_ops │ │ │ └── bitmask_ops.py │ │ ├── utils.py │ │ └── xgrammar_backend.py │ ├── custom_op.py │ ├── debug_utils │ │ ├── __init__.py │ │ ├── dump_comparator.py │ │ ├── dump_loader.py │ │ ├── dumper.py │ │ ├── log_parser.py │ │ ├── model_truncator.py │ │ ├── tensor_dump_forward_hook.py │ │ └── text_comparator.py │ ├── disaggregation │ │ ├── ascend │ │ │ ├── __init__.py │ │ │ ├── conn.py │ │ │ └── transfer_engine.py │ │ ├── base │ │ │ ├── __init__.py │ │ │ └── conn.py │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── conn.py │ │ │ └── utils.py │ │ ├── decode.py │ │ ├── decode_kvcache_offload_manager.py │ │ ├── decode_schedule_batch_mixin.py │ │ ├── fake │ │ │ ├── __init__.py │ │ │ └── conn.py │ │ ├── kv_events.py │ │ ├── mooncake │ │ │ ├── __init__.py │ │ │ ├── conn.py │ │ │ ├── transfer_engine.py │ │ │ └── utils.py │ │ ├── nixl │ │ │ ├── __init__.py │ │ │ └── conn.py │ │ ├── prefill.py │ │ └── utils.py │ ├── distributed │ │ ├── __init__.py │ │ ├── communication_op.py │ │ ├── device_communicators │ │ │ ├── all_reduce_utils.py │ │ │ ├── cuda_wrapper.py │ │ │ ├── custom_all_reduce.py │ │ │ ├── custom_all_reduce_ops.py │ │ │ ├── custom_all_reduce_utils.py │ │ │ ├── hpu_communicator.py │ │ │ ├── npu_communicator.py │ │ │ ├── pymscclpp.py │ │ │ ├── pynccl.py │ │ │ ├── pynccl_allocator.py │ │ │ ├── pynccl_wrapper.py │ │ │ ├── quick_all_reduce.py │ │ │ ├── shm_broadcast.py │ │ │ ├── torch_symm_mem.py │ │ │ └── xpu_communicator.py │ │ ├── naive_distributed.py │ │ ├── parallel_state.py │ │ └── utils.py │ ├── dllm │ │ ├── algorithm │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── low_confidence.py │ │ └── config.py │ ├── elastic_ep │ │ └── elastic_ep.py │ ├── entrypoints │ │ ├── EngineBase.py │ │ ├── context.py │ │ ├── engine.py │ │ ├── grpc_server.py │ │ ├── harmony_utils.py │ │ ├── http_server.py │ │ ├── http_server_engine.py │ │ ├── openai │ │ │ ├── __init__.py │ │ │ ├── encoding_dsv32.py │ │ │ ├── protocol.py │ │ │ ├── serving_base.py │ │ │ ├── serving_chat.py │ │ │ ├── serving_classify.py │ │ │ ├── serving_completions.py │ │ │ ├── serving_embedding.py │ │ │ ├── serving_rerank.py │ │ │ ├── serving_responses.py │ │ │ ├── serving_score.py │ │ │ ├── serving_tokenize.py │ │ │ ├── tool_server.py │ │ │ ├── usage_processor.py │ │ │ └── utils.py │ │ ├── tool.py │ │ └── warmup.py │ ├── environ.py │ ├── eplb │ │ ├── __init__.py │ │ ├── eplb_algorithms │ │ │ ├── __init__.py │ │ │ ├── deepseek.py │ │ │ ├── deepseek_vec.py │ │ │ └── elasticity_aware.py │ │ ├── eplb_manager.py │ │ ├── eplb_simulator │ │ │ ├── __init__.py │ │ │ └── reader.py │ │ ├── expert_distribution.py │ │ ├── expert_location.py │ │ ├── expert_location_dispatch.py │ │ └── expert_location_updater.py │ ├── function_call │ │ ├── base_format_detector.py │ │ ├── core_types.py │ │ ├── deepseekv31_detector.py │ │ ├── deepseekv32_detector.py │ │ ├── deepseekv3_detector.py │ │ ├── function_call_parser.py │ │ ├── glm4_moe_detector.py │ │ ├── gpt_oss_detector.py │ │ ├── json_array_parser.py │ │ ├── kimik2_detector.py │ │ ├── llama32_detector.py │ │ ├── minimax_m2.py │ │ ├── mistral_detector.py │ │ ├── pythonic_detector.py │ │ ├── qwen25_detector.py │ │ ├── qwen3_coder_detector.py │ │ ├── step3_detector.py │ │ └── utils.py │ ├── grpc │ │ ├── __init__.py │ │ ├── compile_proto.py │ │ ├── grpc_request_manager.py │ │ ├── health_servicer.py │ │ ├── scheduler_launcher.py │ │ ├── sglang_scheduler.proto │ │ ├── sglang_scheduler_pb2.py │ │ ├── sglang_scheduler_pb2.pyi │ │ └── sglang_scheduler_pb2_grpc.py │ ├── hardware_backend │ │ └── npu │ │ │ ├── allocator_npu.py │ │ │ ├── attention │ │ │ ├── ascend_backend.py │ │ │ └── mla_preprocess.py │ │ │ ├── cmo.py │ │ │ ├── graph_runner │ │ │ ├── eagle_draft_extend_npu_graph_runner.py │ │ │ ├── eagle_draft_npu_graph_runner.py │ │ │ └── npu_graph_runner.py │ │ │ ├── memory_pool_npu.py │ │ │ ├── modules │ │ │ └── deepseek_v2_attention_mla_npu.py │ │ │ ├── moe │ │ │ └── topk.py │ │ │ ├── quantization │ │ │ ├── fused_moe_method_npu.py │ │ │ ├── linear_method_npu.py │ │ │ └── modelslim.py │ │ │ └── utils.py │ ├── layers │ │ ├── activation.py │ │ ├── amx_utils.py │ │ ├── attention │ │ │ ├── aiter_backend.py │ │ │ ├── attention_registry.py │ │ │ ├── base_attn_backend.py │ │ │ ├── cutlass_mla_backend.py │ │ │ ├── double_sparsity_backend.py │ │ │ ├── dual_chunk_flashattention_backend.py │ │ │ ├── fla │ │ │ │ ├── chunk.py │ │ │ │ ├── chunk_delta_h.py │ │ │ │ ├── chunk_o.py │ │ │ │ ├── chunk_scaled_dot_kkt.py │ │ │ │ ├── cumsum.py │ │ │ │ ├── fused_gdn_gating.py │ │ │ │ ├── fused_recurrent.py │ │ │ │ ├── fused_sigmoid_gating_recurrent.py │ │ │ │ ├── index.py │ │ │ │ ├── kda.py │ │ │ │ ├── l2norm.py │ │ │ │ ├── layernorm_gated.py │ │ │ │ ├── op.py │ │ │ │ ├── solve_tril.py │ │ │ │ ├── utils.py │ │ │ │ └── wy_fast.py │ │ │ ├── flashattention_backend.py │ │ │ ├── flashinfer_backend.py │ │ │ ├── flashinfer_mla_backend.py │ │ │ ├── flashmla_backend.py │ │ │ ├── hybrid_attn_backend.py │ │ │ ├── hybrid_linear_attn_backend.py │ │ │ ├── intel_amx_backend.py │ │ │ ├── mamba │ │ │ │ ├── causal_conv1d.py │ │ │ │ ├── causal_conv1d_triton.py │ │ │ │ ├── mamba.py │ │ │ │ ├── mamba2_metadata.py │ │ │ │ ├── mixer2_rms_norm_gated.py │ │ │ │ └── ops │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── layernorm_gated.py │ │ │ │ │ ├── mamba_ssm.py │ │ │ │ │ ├── ssd_bmm.py │ │ │ │ │ ├── ssd_chunk_scan.py │ │ │ │ │ ├── ssd_chunk_state.py │ │ │ │ │ ├── ssd_combined.py │ │ │ │ │ └── ssd_state_passing.py │ │ │ ├── merge_state.py │ │ │ ├── nsa │ │ │ │ ├── dequant_k_cache.py │ │ │ │ ├── index_buf_accessor.py │ │ │ │ ├── nsa_indexer.py │ │ │ │ ├── quant_k_cache.py │ │ │ │ ├── tilelang_kernel.py │ │ │ │ ├── transform_index.py │ │ │ │ ├── triton_kernel.py │ │ │ │ └── utils.py │ │ │ ├── nsa_backend.py │ │ │ ├── tbo_backend.py │ │ │ ├── torch_flex_backend.py │ │ │ ├── torch_native_backend.py │ │ │ ├── triton_backend.py │ │ │ ├── triton_ops │ │ │ │ ├── decode_attention.py │ │ │ │ ├── double_sparsity_attention.py │ │ │ │ ├── extend_attention.py │ │ │ │ ├── merge_state.py │ │ │ │ ├── prefill_attention.py │ │ │ │ └── rocm_mla_decode_rope.py │ │ │ ├── trtllm_fp8_kv_kernel.py │ │ │ ├── trtllm_mha_backend.py │ │ │ ├── trtllm_mla_backend.py │ │ │ ├── utils.py │ │ │ ├── vision.py │ │ │ ├── vision_utils.py │ │ │ ├── wave_backend.py │ │ │ ├── wave_ops │ │ │ │ ├── decode_attention.py │ │ │ │ ├── extend_attention.py │ │ │ │ └── prefill_attention.py │ │ │ └── xpu_backend.py │ │ ├── communicator.py │ │ ├── communicator_nsa_cp.py │ │ ├── deep_gemm_wrapper │ │ │ ├── __init__.py │ │ │ ├── compile_utils.py │ │ │ ├── configurer.py │ │ │ └── entrypoint.py │ │ ├── dp_attention.py │ │ ├── elementwise.py │ │ ├── flashinfer_comm_fusion.py │ │ ├── layernorm.py │ │ ├── linear.py │ │ ├── logits_processor.py │ │ ├── model_parallel.py │ │ ├── modelopt_utils.py │ │ ├── moe │ │ │ ├── __init__.py │ │ │ ├── cutlass_moe.py │ │ │ ├── cutlass_moe_params.py │ │ │ ├── cutlass_w4a8_moe.py │ │ │ ├── ep_moe │ │ │ │ ├── __init__.py │ │ │ │ ├── kernels.py │ │ │ │ └── layer.py │ │ │ ├── flashinfer_cutedsl_moe.py │ │ │ ├── fused_moe_native.py │ │ │ ├── fused_moe_triton │ │ │ │ ├── __init__.py │ │ │ │ ├── configs │ │ │ │ │ ├── README.md │ │ │ │ │ ├── triton_3_1_0 │ │ │ │ │ │ ├── E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=144,N=512,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=16,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=16,N=1024,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ │ │ │ ├── E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=16,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json │ │ │ │ │ │ ├── E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json │ │ │ │ │ │ ├── E=20,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=24,N=1024,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json │ │ │ │ │ │ ├── E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json │ │ │ │ │ │ ├── E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=128,device_name=NVIDIA_H20,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json │ │ │ │ │ │ ├── E=256,N=64,device_name=NVIDIA_L20,dtype=int8_w8a8.json │ │ │ │ │ │ ├── E=256,N=64,device_name=NVIDIA_L40S,dtype=int8_w8a8.json │ │ │ │ │ │ ├── E=64,N=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json │ │ │ │ │ │ ├── E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=64,N=1280,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=64,N=2560,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=64,N=320,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=64,N=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json │ │ │ │ │ │ ├── E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=64,N=640,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=8,N=14336,device_name=AMD_Instinct_MI300X.json │ │ │ │ │ │ ├── E=8,N=14336,device_name=AMD_Instinct_MI325X.json │ │ │ │ │ │ ├── E=8,N=14336,device_name=AMD_Radeon_Graphics.json │ │ │ │ │ │ ├── E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=14336,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=8,N=1792,device_name=AMD_Instinct_MI300X.json │ │ │ │ │ │ ├── E=8,N=1792,device_name=AMD_Instinct_MI325X.json │ │ │ │ │ │ ├── E=8,N=1792,device_name=AMD_Radeon_Graphics.json │ │ │ │ │ │ ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ │ │ │ ├── E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=1792,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=2048,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=AMD_Instinct_MI300X.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=AMD_Instinct_MI325X.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=AMD_Radeon_Graphics.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=8,N=3584,device_name=NVIDIA_L40S.json │ │ │ │ │ │ ├── E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=4096,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=4096,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=8,N=7168,device_name=AMD_Instinct_MI300X.json │ │ │ │ │ │ ├── E=8,N=7168,device_name=AMD_Instinct_MI325X.json │ │ │ │ │ │ ├── E=8,N=7168,device_name=AMD_Radeon_Graphics.json │ │ │ │ │ │ ├── E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=7168,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=8192,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json │ │ │ │ │ │ └── E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ ├── triton_3_2_0 │ │ │ │ │ │ ├── E=128,N=192,device_name=NVIDIA_A800-SXM4-80GB.json │ │ │ │ │ │ ├── E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=128,N=192,device_name=NVIDIA_H20.json │ │ │ │ │ │ ├── E=128,N=192,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=128,N=384,device_name=NVIDIA_H20.json │ │ │ │ │ │ ├── E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=128,N=384,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=128,N=768,device_name=NVIDIA_A800-SXM4-80GB.json │ │ │ │ │ │ ├── E=128,N=768,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=128,N=768,device_name=NVIDIA_H20.json │ │ │ │ │ │ ├── E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=128,N=768,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=128,N=96,device_name=NVIDIA_H20.json │ │ │ │ │ │ ├── E=129,N=352,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json │ │ │ │ │ │ ├── E=160,N=320,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json │ │ │ │ │ │ ├── E=161,N=192,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json │ │ │ │ │ │ ├── E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=264,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json │ │ │ │ │ │ ├── E=264,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=264,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=264,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=272,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json │ │ │ │ │ │ ├── E=272,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=272,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=272,N=64,device_name=NVIDIA_A800-SXM4-80GB.json │ │ │ │ │ │ ├── E=288,N=64,device_name=NVIDIA_A800-SXM4-80GB.json │ │ │ │ │ │ └── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ ├── triton_3_3_0 │ │ │ │ │ │ └── E=16,N=1024,device_name=NVIDIA_B200.json │ │ │ │ │ ├── triton_3_3_1 │ │ │ │ │ │ ├── E=128,N=352,device_name=NVIDIA_RTX_6000_Ada_Generation,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=128,N=384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=128,N=768,device_name=NVIDIA_H20.json │ │ │ │ │ │ ├── E=160,N=192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=160,N=320,device_name=NVIDIA_H20-3e.json │ │ │ │ │ │ ├── E=160,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=160,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=384,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=384,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=384,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=385,N=128,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=385,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ └── E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ └── triton_3_4_0 │ │ │ │ │ │ ├── E=128,N=1856,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=128,N=1856,device_name=NVIDIA_L40S.json │ │ │ │ │ │ ├── E=128,N=192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=128,N=352,device_name=NVIDIA_RTX_5880_Ada_Generation,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=128,N=384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=128,N=928,device_name=NVIDIA_L40S.json │ │ │ │ │ │ ├── E=129,N=352,device_name=NVIDIA_B200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=129,N=352,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Max-Q_Workstation_Edition,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=129,N=704,device_name=NVIDIA_B200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=160,N=384,device_name=NVIDIA_B200,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=161,N=192,device_name=NVIDIA_H200,dtype=fp8_w8a8,per_channel_quant=True.json │ │ │ │ │ │ ├── E=161,N=384,device_name=NVIDIA_RTX_PRO_6000_Blackwell_Max-Q_Workstation_Edition,dtype=fp8_w8a8.json │ │ │ │ │ │ ├── E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=256,device_name=NVIDIA_B200.json │ │ │ │ │ │ ├── E=256,N=256,device_name=NVIDIA_H800,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=256,N=512,device_name=NVIDIA_B200.json │ │ │ │ │ │ ├── E=256,N=512,device_name=NVIDIA_H20.json │ │ │ │ │ │ ├── E=257,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=257,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128]_down.json │ │ │ │ │ │ ├── E=257,N=64,device_name=NVIDIA_A100-SXM4-80GB.json │ │ │ │ │ │ ├── E=384,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=512,N=128,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ ├── E=512,N=128,device_name=NVIDIA_H20-3e.json │ │ │ │ │ │ ├── E=512,N=128,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=512,N=128,device_name=NVIDIA_H800,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ │ │ ├── E=512,N=256,device_name=NVIDIA_B200.json │ │ │ │ │ │ ├── E=512,N=256,device_name=NVIDIA_H20-3e.json │ │ │ │ │ │ ├── E=512,N=256,device_name=NVIDIA_H200.json │ │ │ │ │ │ ├── E=512,N=64,device_name=NVIDIA_H100_80GB_HBM3.json │ │ │ │ │ │ └── E=512,N=64,device_name=NVIDIA_H200.json │ │ │ │ ├── fused_marlin_moe.py │ │ │ │ ├── fused_moe.py │ │ │ │ ├── fused_moe_triton_config.py │ │ │ │ ├── fused_moe_triton_kernels.py │ │ │ │ ├── layer.py │ │ │ │ ├── moe_align_block_size.py │ │ │ │ └── triton_kernels_moe.py │ │ │ ├── kt_ep_wrapper.py │ │ │ ├── moe_runner │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── deep_gemm.py │ │ │ │ ├── runner.py │ │ │ │ ├── triton.py │ │ │ │ └── triton_kernels.py │ │ │ ├── rocm_moe_utils.py │ │ │ ├── router.py │ │ │ ├── token_dispatcher │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── deepep.py │ │ │ │ ├── fuseep.py │ │ │ │ ├── mooncake.py │ │ │ │ └── standard.py │ │ │ ├── topk.py │ │ │ └── utils.py │ │ ├── multimodal.py │ │ ├── parameter.py │ │ ├── pooler.py │ │ ├── quantization │ │ │ ├── __init__.py │ │ │ ├── auto_round.py │ │ │ ├── awq.py │ │ │ ├── awq_triton.py │ │ │ ├── base_config.py │ │ │ ├── blockwise_int8.py │ │ │ ├── compressed_tensors │ │ │ │ ├── README.md │ │ │ │ ├── compressed_tensors.py │ │ │ │ ├── compressed_tensors_moe.py │ │ │ │ ├── schemes │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── compressed_tensors_scheme.py │ │ │ │ │ ├── compressed_tensors_w8a16_fp8.py │ │ │ │ │ ├── compressed_tensors_w8a8_fp8.py │ │ │ │ │ ├── compressed_tensors_w8a8_int8.py │ │ │ │ │ └── compressed_tensors_wNa16.py │ │ │ │ └── utils.py │ │ │ ├── configs │ │ │ │ ├── N=1280,K=5120,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=3072,K=1536,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4096,K=512,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4608,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=512,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=5120,K=1024,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=5120,K=3200,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=6400,K=5120,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2048,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2304,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=256,device_name=AMD_Radeon_Graphics,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128, 128].json │ │ │ │ ├── N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json │ │ │ │ └── README.md │ │ │ ├── fp8.py │ │ │ ├── fp8_kernel.py │ │ │ ├── fp8_utils.py │ │ │ ├── fpgemm_fp8.py │ │ │ ├── gguf.py │ │ │ ├── gptq.py │ │ │ ├── int8_kernel.py │ │ │ ├── int8_utils.py │ │ │ ├── kv_cache.py │ │ │ ├── kvfp4_tensor.py │ │ │ ├── marlin_utils.py │ │ │ ├── marlin_utils_fp8.py │ │ │ ├── modelopt_quant.py │ │ │ ├── moe_wna16.py │ │ │ ├── mxfp4.py │ │ │ ├── mxfp4_tensor.py │ │ │ ├── petit.py │ │ │ ├── petit_utils.py │ │ │ ├── qoq.py │ │ │ ├── quark │ │ │ │ ├── __init__.py │ │ │ │ ├── quark.py │ │ │ │ ├── quark_moe.py │ │ │ │ ├── schemes │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── quark_scheme.py │ │ │ │ │ ├── quark_w4a4_mxfp4.py │ │ │ │ │ └── quark_w8a8_fp8.py │ │ │ │ └── utils.py │ │ │ ├── rocm_mxfp4_utils.py │ │ │ ├── unquant.py │ │ │ ├── utils.py │ │ │ ├── w4afp8.py │ │ │ ├── w8a8_fp8.py │ │ │ └── w8a8_int8.py │ │ ├── radix_attention.py │ │ ├── rocm_linear_utils.py │ │ ├── rotary_embedding.py │ │ ├── sampler.py │ │ ├── sparse_pooler.py │ │ ├── torchao_utils.py │ │ ├── utils.py │ │ └── vocab_parallel_embedding.py │ ├── lora │ │ ├── backend │ │ │ ├── ascend_backend.py │ │ │ ├── base_backend.py │ │ │ ├── chunked_backend.py │ │ │ ├── lora_registry.py │ │ │ ├── torch_backend.py │ │ │ └── triton_backend.py │ │ ├── eviction_policy.py │ │ ├── layers.py │ │ ├── lora.py │ │ ├── lora_config.py │ │ ├── lora_manager.py │ │ ├── lora_registry.py │ │ ├── mem_pool.py │ │ ├── torch_ops │ │ │ ├── __init__.py │ │ │ └── lora_ops.py │ │ ├── triton_ops │ │ │ ├── __init__.py │ │ │ ├── chunked_sgmv_expand.py │ │ │ ├── chunked_sgmv_shrink.py │ │ │ ├── gate_up_lora_b.py │ │ │ ├── qkv_lora_b.py │ │ │ ├── sgemm_lora_a.py │ │ │ └── sgemm_lora_b.py │ │ └── utils.py │ ├── managers │ │ ├── async_dynamic_batch_tokenizer.py │ │ ├── async_mm_data_processor.py │ │ ├── cache_controller.py │ │ ├── configure_logging.py │ │ ├── data_parallel_controller.py │ │ ├── detokenizer_manager.py │ │ ├── disagg_service.py │ │ ├── io_struct.py │ │ ├── mm_utils.py │ │ ├── multi_tokenizer_mixin.py │ │ ├── multimodal_processor.py │ │ ├── overlap_utils.py │ │ ├── request_metrics_exporter.py │ │ ├── schedule_batch.py │ │ ├── schedule_policy.py │ │ ├── scheduler.py │ │ ├── scheduler_dp_attn_mixin.py │ │ ├── scheduler_input_blocker.py │ │ ├── scheduler_metrics_mixin.py │ │ ├── scheduler_output_processor_mixin.py │ │ ├── scheduler_pp_mixin.py │ │ ├── scheduler_profiler_mixin.py │ │ ├── scheduler_recv_skipper.py │ │ ├── scheduler_runtime_checker_mixin.py │ │ ├── scheduler_update_weights_mixin.py │ │ ├── session_controller.py │ │ ├── template_manager.py │ │ ├── tokenizer_communicator_mixin.py │ │ ├── tokenizer_manager.py │ │ ├── tp_worker.py │ │ └── utils.py │ ├── mem_cache │ │ ├── allocator.py │ │ ├── base_prefix_cache.py │ │ ├── cache_init_params.py │ │ ├── chunk_cache.py │ │ ├── common.py │ │ ├── cpp_radix_tree │ │ │ ├── .clang-format │ │ │ ├── common.h │ │ │ ├── radix_tree.py │ │ │ ├── tree_v2.cpp │ │ │ ├── tree_v2.h │ │ │ ├── tree_v2_binding.cpp │ │ │ ├── tree_v2_debug.cpp │ │ │ ├── tree_v2_impl.h │ │ │ └── tree_v2_node.h │ │ ├── evict_policy.py │ │ ├── flush_cache.py │ │ ├── hicache_storage.py │ │ ├── hiradix_cache.py │ │ ├── mamba_radix_cache.py │ │ ├── memory_pool.py │ │ ├── memory_pool_host.py │ │ ├── multimodal_cache.py │ │ ├── radix_cache.py │ │ ├── radix_cache_cpp.py │ │ ├── storage │ │ │ ├── __init__.py │ │ │ ├── aibrix_kvcache │ │ │ │ ├── README.md │ │ │ │ ├── aibrix_kvcache_storage.py │ │ │ │ └── unit_test.py │ │ │ ├── backend_factory.py │ │ │ ├── eic │ │ │ │ ├── README.md │ │ │ │ ├── eic_storage.py │ │ │ │ └── test_unit.py │ │ │ ├── hf3fs │ │ │ │ ├── docs │ │ │ │ │ ├── README.md │ │ │ │ │ ├── deploy_sglang_3fs_multinode.md │ │ │ │ │ └── setup_usrbio_client.md │ │ │ │ ├── hf3fs_client.py │ │ │ │ ├── hf3fs_usrbio_client.py │ │ │ │ ├── hf3fs_utils.cpp │ │ │ │ ├── mini_3fs_metadata_server.py │ │ │ │ ├── storage_hf3fs.py │ │ │ │ └── test_hf3fs_utils.py │ │ │ ├── lmcache │ │ │ │ ├── README.md │ │ │ │ ├── example_config.yaml │ │ │ │ ├── lmc_radix_cache.py │ │ │ │ └── unit_test.py │ │ │ ├── mooncake_store │ │ │ │ ├── README.md │ │ │ │ ├── mooncake_store.py │ │ │ │ └── test_mooncake_store.py │ │ │ └── nixl │ │ │ │ ├── README.md │ │ │ │ ├── hicache_nixl.py │ │ │ │ ├── nixl_utils.py │ │ │ │ └── test_hicache_nixl_storage.py │ │ ├── swa_radix_cache.py │ │ └── utils.py │ ├── metrics │ │ ├── collector.py │ │ ├── func_timer.py │ │ ├── label_transform.py │ │ ├── startup_func_log_and_timer.py │ │ └── utils.py │ ├── model_executor │ │ ├── cpu_graph_runner.py │ │ ├── cuda_graph_runner.py │ │ ├── forward_batch_info.py │ │ ├── hook_manager.py │ │ ├── input_buffers.py │ │ ├── mindspore_runner.py │ │ ├── model_runner.py │ │ └── piecewise_cuda_graph_runner.py │ ├── model_loader │ │ ├── __init__.py │ │ ├── loader.py │ │ ├── remote_instance_weight_loader_utils.py │ │ ├── utils.py │ │ ├── weight_utils.py │ │ └── weight_validation.py │ ├── models │ │ ├── apertus.py │ │ ├── arcee.py │ │ ├── baichuan.py │ │ ├── bailing_moe.py │ │ ├── bailing_moe_nextn.py │ │ ├── bert.py │ │ ├── chatglm.py │ │ ├── clip.py │ │ ├── commandr.py │ │ ├── dbrx.py │ │ ├── deepseek.py │ │ ├── deepseek_janus_pro.py │ │ ├── deepseek_nextn.py │ │ ├── deepseek_ocr.py │ │ ├── deepseek_v2.py │ │ ├── deepseek_vl2.py │ │ ├── dots_ocr.py │ │ ├── dots_vlm.py │ │ ├── dots_vlm_vit.py │ │ ├── ernie4.py │ │ ├── ernie4_eagle.py │ │ ├── exaone.py │ │ ├── falcon_h1.py │ │ ├── gemma.py │ │ ├── gemma2.py │ │ ├── gemma2_reward.py │ │ ├── gemma3_causal.py │ │ ├── gemma3_mm.py │ │ ├── gemma3n_audio.py │ │ ├── gemma3n_causal.py │ │ ├── gemma3n_mm.py │ │ ├── glm4.py │ │ ├── glm4_moe.py │ │ ├── glm4_moe_nextn.py │ │ ├── glm4v.py │ │ ├── glm4v_moe.py │ │ ├── gpt2.py │ │ ├── gpt_bigcode.py │ │ ├── gpt_oss.py │ │ ├── granite.py │ │ ├── granitemoe.py │ │ ├── grok.py │ │ ├── hunyuan.py │ │ ├── idefics2.py │ │ ├── internlm2.py │ │ ├── internlm2_reward.py │ │ ├── interns1.py │ │ ├── internvl.py │ │ ├── jet_nemotron.py │ │ ├── jet_vlm.py │ │ ├── kimi_linear.py │ │ ├── kimi_vl.py │ │ ├── kimi_vl_moonvit.py │ │ ├── llada2.py │ │ ├── llama.py │ │ ├── llama4.py │ │ ├── llama_classification.py │ │ ├── llama_eagle.py │ │ ├── llama_eagle3.py │ │ ├── llama_embedding.py │ │ ├── llama_reward.py │ │ ├── llava.py │ │ ├── llavavid.py │ │ ├── longcat_flash.py │ │ ├── longcat_flash_nextn.py │ │ ├── mimo.py │ │ ├── mimo_mtp.py │ │ ├── mindspore.py │ │ ├── minicpm.py │ │ ├── minicpm3.py │ │ ├── minicpmo.py │ │ ├── minicpmv.py │ │ ├── minimax_m2.py │ │ ├── ministral3.py │ │ ├── mistral.py │ │ ├── mistral_large_3.py │ │ ├── mistral_large_3_eagle.py │ │ ├── mixtral.py │ │ ├── mixtral_quant.py │ │ ├── mllama.py │ │ ├── mllama4.py │ │ ├── nano_nemotron_vl.py │ │ ├── nemotron_h.py │ │ ├── nemotron_nas.py │ │ ├── nvila.py │ │ ├── nvila_lite.py │ │ ├── olmo.py │ │ ├── olmo2.py │ │ ├── olmoe.py │ │ ├── opt.py │ │ ├── orion.py │ │ ├── persimmon.py │ │ ├── phi.py │ │ ├── phi3_small.py │ │ ├── phi4mm.py │ │ ├── phi4mm_audio.py │ │ ├── phi4mm_utils.py │ │ ├── phimoe.py │ │ ├── pixtral.py │ │ ├── points_v15_chat.py │ │ ├── qwen.py │ │ ├── qwen2.py │ │ ├── qwen2_5_vl.py │ │ ├── qwen2_audio.py │ │ ├── qwen2_classification.py │ │ ├── qwen2_eagle.py │ │ ├── qwen2_moe.py │ │ ├── qwen2_rm.py │ │ ├── qwen2_vl.py │ │ ├── qwen3.py │ │ ├── qwen3_classification.py │ │ ├── qwen3_moe.py │ │ ├── qwen3_next.py │ │ ├── qwen3_next_mtp.py │ │ ├── qwen3_omni_moe.py │ │ ├── qwen3_vl.py │ │ ├── qwen3_vl_moe.py │ │ ├── radio.py │ │ ├── registry.py │ │ ├── roberta.py │ │ ├── sarashina2_vision.py │ │ ├── siglip.py │ │ ├── solar.py │ │ ├── stablelm.py │ │ ├── starcoder2.py │ │ ├── step3_vl.py │ │ ├── teleflm.py │ │ ├── torch_native_llama.py │ │ ├── transformers.py │ │ ├── utils.py │ │ ├── xverse.py │ │ ├── xverse_moe.py │ │ └── yivl.py │ ├── multimodal │ │ ├── customized_mm_processor_utils.py │ │ ├── internvl_utils.py │ │ ├── mm_utils.py │ │ └── processors │ │ │ ├── base_processor.py │ │ │ ├── clip.py │ │ │ ├── deepseek_ocr.py │ │ │ ├── deepseek_vl_v2.py │ │ │ ├── dots_vlm.py │ │ │ ├── gemma3.py │ │ │ ├── gemma3n.py │ │ │ ├── glm4v.py │ │ │ ├── internvl.py │ │ │ ├── janus_pro.py │ │ │ ├── kimi_vl.py │ │ │ ├── llava.py │ │ │ ├── minicpm.py │ │ │ ├── mlama.py │ │ │ ├── mllama4.py │ │ │ ├── nano_nemotron_vl.py │ │ │ ├── nvila.py │ │ │ ├── phi4mm.py │ │ │ ├── pixtral.py │ │ │ ├── points_v15_chat.py │ │ │ ├── qwen_audio.py │ │ │ ├── qwen_vl.py │ │ │ ├── sarashina2_vision.py │ │ │ └── step3_vl.py │ ├── multiplex │ │ ├── multiplexing_mixin.py │ │ └── pdmux_context.py │ ├── parser │ │ ├── code_completion_parser.py │ │ ├── conversation.py │ │ ├── harmony_parser.py │ │ ├── jinja_template_utils.py │ │ └── reasoning_parser.py │ ├── sampling │ │ ├── custom_logit_processor.py │ │ ├── penaltylib │ │ │ ├── __init__.py │ │ │ ├── frequency_penalty.py │ │ │ ├── min_new_tokens.py │ │ │ ├── orchestrator.py │ │ │ └── presence_penalty.py │ │ ├── sampling_batch_info.py │ │ └── sampling_params.py │ ├── server_args.py │ ├── server_args_config_parser.py │ ├── speculative │ │ ├── base_spec_worker.py │ │ ├── cpp_ngram │ │ │ ├── .clang-format │ │ │ ├── ngram.cpp │ │ │ ├── ngram.h │ │ │ ├── ngram_cache.py │ │ │ ├── ngram_cache_binding.cpp │ │ │ ├── param.h │ │ │ └── queue.h │ │ ├── draft_utils.py │ │ ├── eagle_draft_cuda_graph_runner.py │ │ ├── eagle_draft_extend_cuda_graph_runner.py │ │ ├── eagle_info.py │ │ ├── eagle_info_v2.py │ │ ├── eagle_utils.py │ │ ├── eagle_worker.py │ │ ├── eagle_worker_v2.py │ │ ├── ngram_info.py │ │ ├── ngram_worker.py │ │ ├── spec_info.py │ │ ├── spec_utils.py │ │ └── standalone_worker.py │ ├── tokenizer │ │ └── tiktoken_tokenizer.py │ ├── tracing │ │ └── trace.py │ ├── utils │ │ ├── __init__.py │ │ ├── aio_rwlock.py │ │ ├── bench_utils.py │ │ ├── common.py │ │ ├── cuda_ipc_transport_utils.py │ │ ├── hf_transformers_utils.py │ │ ├── host_shared_memory.py │ │ ├── mistral_utils.py │ │ ├── numa_utils.py │ │ ├── nvtx_pytorch_hooks.py │ │ ├── offloader.py │ │ ├── patch_torch.py │ │ ├── poll_based_barrier.py │ │ ├── profile_merger.py │ │ ├── profile_utils.py │ │ ├── rpd_utils.py │ │ ├── slow_rank_detector.py │ │ ├── torch_memory_saver_adapter.py │ │ └── weight_checker.py │ └── weight_sync │ │ ├── tensor_bucket.py │ │ └── utils.py │ ├── test │ ├── __init__.py │ ├── attention │ │ ├── __init__.py │ │ ├── test_flashattn_backend.py │ │ ├── test_flashattn_mla_backend.py │ │ ├── test_prefix_chunk_info.py │ │ └── test_trtllm_mla_backend.py │ ├── ci │ │ ├── ci_register.py │ │ ├── ci_stress_utils.py │ │ └── ci_utils.py │ ├── doc_patch.py │ ├── few_shot_gsm8k.py │ ├── few_shot_gsm8k_engine.py │ ├── get_logits_ut.py │ ├── gsm8k_mixin.py │ ├── kits │ │ ├── ebnf_constrained_kit.py │ │ ├── json_constrained_kit.py │ │ ├── matched_stop_kit.py │ │ ├── radix_cache_server_kit.py │ │ └── regex_constrained_kit.py │ ├── kl_test_utils.py │ ├── long_prompt.txt │ ├── longbench_v2 │ │ ├── __init__.py │ │ ├── longbench_v2_evaluation.md │ │ ├── test_longbench_v2_eval.py │ │ ├── validate_longbench_v2.py │ │ └── validate_longbench_v2_standalone.py │ ├── mmmu_vlm_mixin.py │ ├── nightly_bench_utils.py │ ├── run_eval.py │ ├── runners.py │ ├── send_one.py │ ├── simple_eval_common.py │ ├── simple_eval_gpqa.py │ ├── simple_eval_humaneval.py │ ├── simple_eval_longbench_v2.py │ ├── simple_eval_math.py │ ├── simple_eval_mgsm.py │ ├── simple_eval_mmlu.py │ ├── simple_eval_mmmu_vlm.py │ ├── speculative │ │ └── test_spec_utils.py │ ├── test_activation.py │ ├── test_block_fp8.py │ ├── test_block_fp8_deep_gemm_blackwell.py │ ├── test_custom_ops.py │ ├── test_cutlass_moe.py │ ├── test_cutlass_w16a16_moe.py │ ├── test_cutlass_w4a8_moe.py │ ├── test_deepep_utils.py │ ├── test_deterministic.py │ ├── test_deterministic_utils.py │ ├── test_disaggregation_utils.py │ ├── test_dynamic_grad_mode.py │ ├── test_kvfp4_quant_dequant.py │ ├── test_layernorm.py │ ├── test_marlin_moe.py │ ├── test_marlin_utils.py │ ├── test_programs.py │ └── test_utils.py │ ├── utils.py │ └── version.py ├── scripts ├── check_vram_clear.sh ├── ci │ ├── amd_ci_exec.sh │ ├── amd_ci_install_dependency.sh │ ├── amd_ci_start_container.sh │ ├── ci_install_deepep.sh │ ├── ci_install_dependency.sh │ ├── ci_install_rust.sh │ ├── ci_start_disaggregation_servers.sh │ ├── cleanup_hf_cache.py │ ├── npu_ci_install_dependency.sh │ ├── npu_log_print.sh │ ├── prepare_runner.sh │ ├── publish_traces.py │ ├── slash_command_handler.py │ └── test_rccl_multi_gpu.py ├── ci_monitor │ ├── README.md │ ├── ci_analyzer.py │ ├── ci_analyzer_balance.py │ ├── ci_analyzer_perf.py │ └── ci_failures_analysis.py ├── code_sync │ ├── copy_from_oss.py │ ├── copy_to_oss.py │ ├── guideline.md │ └── install_github_cli.sh ├── convert_otel_2_perfetto.py ├── ensure_vram_clear.sh ├── export_deepseek_nextn.py ├── killall_sglang.sh ├── playground │ ├── bench_speculative.py │ ├── disaggregation │ │ ├── cli-logprob.py │ │ ├── cli-so.py │ │ └── cli.py │ ├── frontend_reasoning.ipynb │ ├── load_tokenizer.py │ ├── long_context_example.py │ ├── lora │ │ ├── analyzer.py │ │ ├── lora_hf_play.py │ │ └── lora_vllm_play.py │ ├── reference_hf.py │ ├── replay_request_dump.py │ └── router │ │ ├── test_tree.py │ │ └── tree.py ├── release │ ├── README.md │ ├── bump_kernel_version.py │ ├── bump_kernel_version_to_sglang.py │ ├── bump_sglang_version.py │ ├── check_kernel_version_to_sglang.py │ ├── commit_and_pr.sh │ ├── commit_and_pr_kernel_to_sglang.sh │ ├── test_utils.py │ └── utils.py ├── sort_testcases_alphabetically.py ├── update_kernel_whl_index.py └── version_branch_to_tag.sh ├── sgl-kernel ├── .clang-format ├── CMakeLists.txt ├── LICENSE ├── Makefile ├── README.md ├── THIRDPARTYNOTICES.txt ├── analyze_whl_kernel_sizes.py ├── benchmark │ ├── bench_activation.py │ ├── bench_awq_dequant.py │ ├── bench_cutlass_mla.py │ ├── bench_dsv3_fused_a_gemm.py │ ├── bench_dsv3_router_gemm.py │ ├── bench_es_fp8_blockwise_grouped_gemm.py │ ├── bench_fp4_gemm.py │ ├── bench_fp8_blockwise_gemm.py │ ├── bench_fp8_blockwise_group_gemm.py │ ├── bench_fp8_gemm.py │ ├── bench_int8_gemm.py │ ├── bench_kimi_k2_moe_fused_gate.py │ ├── bench_moe_align_block_size.py │ ├── bench_moe_ep_post_reorder.py │ ├── bench_moe_fused_gate.py │ ├── bench_moe_topk_sigmoid.py │ ├── bench_moe_topk_softmax.py │ ├── bench_mrope.py │ ├── bench_nvfp4_scaled_gemm.py │ ├── bench_per_tensor_quant_fp8.py │ ├── bench_per_token_group_quant_8bit.py │ ├── bench_per_token_quant_fp8.py │ ├── bench_qserve_w4a8_gemm.py │ ├── bench_rmsnorm.py │ ├── bench_rotary_embedding.py │ ├── bench_sum_scale.py │ └── bench_top_k_top_p_sampling.py ├── build.sh ├── cmake │ ├── flashmla.cmake │ └── utils.cmake ├── csrc │ ├── allreduce │ │ ├── custom_all_reduce.cu │ │ ├── custom_all_reduce.cuh │ │ ├── custom_all_reduce.hip │ │ ├── custom_all_reduce_hip.cuh │ │ ├── mscclpp_allreduce.cu │ │ ├── mscclpp_allreduce.cuh │ │ ├── quick_all_reduce.cu │ │ ├── quick_all_reduce.cuh │ │ ├── quick_all_reduce.h │ │ ├── quick_all_reduce_base.h │ │ └── test_mscclpp_allreduce.cu │ ├── attention │ │ ├── cascade.cu │ │ ├── cutlass_mla_kernel.cu │ │ ├── cutlass_sm100_mla │ │ │ ├── device │ │ │ │ └── sm100_mla.hpp │ │ │ └── kernel │ │ │ │ ├── sm100_fmha_mla_reduction.hpp │ │ │ │ ├── sm100_fmha_mla_tma_warpspecialized.hpp │ │ │ │ └── sm100_mla_tile_scheduler.hpp │ │ ├── merge_attn_states.cu │ │ └── vertical_slash_index.cu │ ├── common_extension.cc │ ├── common_extension_rocm.cc │ ├── cpu │ │ ├── CMakeLists.txt │ │ ├── activation.cpp │ │ ├── bmm.cpp │ │ ├── common.h │ │ ├── decode.cpp │ │ ├── extend.cpp │ │ ├── gemm.cpp │ │ ├── gemm.h │ │ ├── gemm_fp8.cpp │ │ ├── gemm_int8.cpp │ │ ├── interface.cpp │ │ ├── mamba │ │ │ ├── conv.cpp │ │ │ └── fla.cpp │ │ ├── model │ │ │ └── qwen3.cpp │ │ ├── moe.cpp │ │ ├── moe_fp8.cpp │ │ ├── moe_int8.cpp │ │ ├── norm.cpp │ │ ├── numa_utils.cpp │ │ ├── qkv_proj.cpp │ │ ├── rope.cpp │ │ ├── shm.cpp │ │ ├── shm.h │ │ ├── topk.cpp │ │ ├── torch_extension_cpu.cpp │ │ ├── vec.h │ │ └── vec_pack.h │ ├── cutlass_extensions │ │ ├── common.hpp │ │ ├── detail │ │ │ └── collective │ │ │ │ └── mixed_input_utils.hpp │ │ ├── epilogue │ │ │ └── epilogue_per_row_per_col_scale.h │ │ └── gemm │ │ │ ├── collective │ │ │ ├── builders │ │ │ │ └── sm90_gmma_builder_mixed_input.inl │ │ │ ├── collective_builder_mixed_input.hpp │ │ │ ├── collective_mma_array_mixed_input.hpp │ │ │ └── sm90_mma_array_tma_gmma_rs_warpspecialized_mixed_input_.hpp │ │ │ ├── cutlass_gemm_caller.cuh │ │ │ ├── dispatch_policy.hpp │ │ │ ├── fp8_blockwise_gemm_sm90_dispatch.cuh │ │ │ ├── gemm_universal_base_compat.h │ │ │ └── gemm_with_epilogue_visitor.h │ ├── elementwise │ │ ├── activation.cu │ │ ├── cast.cu │ │ ├── concat_mla.cu │ │ ├── copy.cu │ │ ├── fused_add_rms_norm_kernel.cu │ │ ├── pos_enc.cu │ │ ├── pos_enc.cuh │ │ ├── rope.cu │ │ ├── topk.cu │ │ └── utils.cuh │ ├── expert_specialization │ │ ├── es_fp8_blockwise.cu │ │ ├── es_fp8_blockwise_functor.cuh │ │ ├── es_fp8_blockwise_launcher.cuh │ │ ├── es_fp8_blockwise_traits.cuh │ │ ├── es_sm100_mxfp8_blockscaled.cu │ │ ├── es_sm100_mxfp8_blockscaled_functor.cuh │ │ ├── es_sm100_mxfp8_blockscaled_group_quant.cu │ │ ├── es_sm100_mxfp8_blockscaled_group_quant.cuh │ │ ├── es_sm100_mxfp8_blockscaled_launcher.cuh │ │ └── es_sm100_mxfp8_blockscaled_traits.cuh │ ├── flash_extension.cc │ ├── flashmla_extension.cc │ ├── gemm │ │ ├── awq_kernel.cu │ │ ├── bmm_fp8.cu │ │ ├── dsv3_fused_a_gemm.cu │ │ ├── dsv3_router_gemm_bf16_out.cu │ │ ├── dsv3_router_gemm_entry.cu │ │ ├── dsv3_router_gemm_float_out.cu │ │ ├── fp8_blockwise_gemm_kernel.cu │ │ ├── fp8_gemm_kernel.cu │ │ ├── gptq │ │ │ ├── compat.cuh │ │ │ ├── gptq_kernel.cu │ │ │ ├── matrix_view.cuh │ │ │ ├── qdq_2.cuh │ │ │ ├── qdq_3.cuh │ │ │ ├── qdq_4.cuh │ │ │ ├── qdq_8.cuh │ │ │ └── qdq_util.cuh │ │ ├── int8_gemm_kernel.cu │ │ ├── marlin │ │ │ ├── awq_marlin_repack.cu │ │ │ ├── dequant.h │ │ │ ├── gptq_marlin.cu │ │ │ ├── gptq_marlin_repack.cu │ │ │ ├── kernel.h │ │ │ ├── marlin.cuh │ │ │ ├── marlin_dtypes.cuh │ │ │ └── marlin_template.h │ │ ├── math.hpp │ │ ├── nvfp4_expert_quant.cu │ │ ├── nvfp4_quant.cuh │ │ ├── nvfp4_quant_entry.cu │ │ ├── nvfp4_quant_kernels.cu │ │ ├── nvfp4_scaled_mm_entry.cu │ │ ├── nvfp4_scaled_mm_kernels.cu │ │ ├── per_tensor_quant_fp8.cu │ │ ├── per_token_group_quant_8bit.cu │ │ ├── per_token_group_quant_8bit_v2.cu │ │ ├── per_token_quant_fp8.cu │ │ ├── qserve_w4a8_per_chn_gemm.cu │ │ └── qserve_w4a8_per_group_gemm.cu │ ├── grammar │ │ └── apply_token_bitmask_inplace_cuda.cu │ ├── kvcacheio │ │ └── transfer.cu │ ├── mamba │ │ ├── causal_conv1d.cu │ │ └── causal_conv1d.h │ ├── memory │ │ ├── store.cu │ │ └── weak_ref_tensor.cpp │ ├── moe │ │ ├── cutlass_moe │ │ │ └── w4a8 │ │ │ │ ├── scaled_mm_entry.cu │ │ │ │ ├── w4a8_get_group_starts.cuh │ │ │ │ ├── w4a8_grouped_mm_c3x.cu │ │ │ │ ├── w4a8_grouped_mm_c3x.cuh │ │ │ │ └── w4a8_moe_data.cu │ │ ├── cutlass_moe_helper.cu │ │ ├── fp8_blockwise_moe_kernel.cu │ │ ├── fused_qknorm_rope_kernel.cu │ │ ├── kimi_k2_moe_fused_gate.cu │ │ ├── marlin_moe_wna16 │ │ │ ├── generate_kernels.py │ │ │ ├── kernel.h │ │ │ ├── kernel_bf16_ku4.cuh │ │ │ ├── kernel_bf16_ku4b8.cuh │ │ │ ├── kernel_bf16_ku8b128.cuh │ │ │ ├── kernel_fp16_ku4.cuh │ │ │ ├── kernel_fp16_ku4b8.cuh │ │ │ ├── kernel_fp16_ku8b128.cuh │ │ │ ├── kernel_marlin.cuh │ │ │ ├── marlin_template.h │ │ │ └── ops.cu │ │ ├── moe_align_kernel.cu │ │ ├── moe_fused_gate.cu │ │ ├── moe_sum.cu │ │ ├── moe_sum_reduce.cu │ │ ├── moe_topk_sigmoid_kernels.cu │ │ ├── moe_topk_softmax_kernels.cu │ │ ├── nvfp4_blockwise_moe.cu │ │ └── prepare_moe_input.cu │ ├── quantization │ │ └── gguf │ │ │ ├── dequantize.cuh │ │ │ ├── ggml-common.h │ │ │ ├── gguf_kernel.cu │ │ │ ├── mmq.cuh │ │ │ ├── mmvq.cuh │ │ │ ├── moe.cuh │ │ │ ├── moe_vec.cuh │ │ │ └── vecdotq.cuh │ ├── spatial │ │ ├── cuda_utils.h │ │ ├── greenctx_stream.cu │ │ └── greenctx_stream.h │ ├── spatial_extension.cc │ └── speculative │ │ ├── eagle_utils.cu │ │ ├── ngram_utils.cu │ │ ├── packbit.cu │ │ ├── speculative_sampling.cu │ │ └── speculative_sampling.cuh ├── include │ ├── hip │ │ ├── hip_act_and_mul.cuh │ │ ├── hip_math_def.h │ │ ├── hip_vec_dtypes.h │ │ └── impl │ │ │ ├── hip_vec_bf16_impl.h │ │ │ ├── hip_vec_fp32_impl.h │ │ │ └── hip_vec_half_impl.h │ ├── pytorch_extension_utils_rocm.h │ ├── scalar_type.hpp │ ├── sgl_flash_kernel_ops.h │ ├── sgl_kernel_ops.h │ ├── sgl_kernel_torch_shim.h │ └── utils.h ├── kernel-runner-setup.sh ├── pyproject.toml ├── pyproject_cpu.toml ├── pyproject_rocm.toml ├── python │ └── sgl_kernel │ │ ├── __init__.py │ │ ├── _fa4_interface.py │ │ ├── allreduce.py │ │ ├── attention.py │ │ ├── cutlass_moe.py │ │ ├── elementwise.py │ │ ├── expert_specialization.py │ │ ├── flash_attn.py │ │ ├── flash_mla.py │ │ ├── fused_moe.py │ │ ├── gemm.py │ │ ├── grammar.py │ │ ├── hadamard.py │ │ ├── kvcacheio.py │ │ ├── load_utils.py │ │ ├── mamba.py │ │ ├── marlin.py │ │ ├── memory.py │ │ ├── moe.py │ │ ├── quantization │ │ ├── __init__.py │ │ └── gguf.py │ │ ├── sampling.py │ │ ├── scalar_type.py │ │ ├── sparse_flash_attn.py │ │ ├── spatial.py │ │ ├── speculative.py │ │ ├── test_utils.py │ │ ├── testing │ │ ├── __init__.py │ │ └── rotary_embedding.py │ │ ├── top_k.py │ │ ├── utils.py │ │ └── version.py ├── rename_wheels.sh ├── setup_rocm.py └── tests │ ├── conftest.py │ ├── spatial │ └── test_greenctx_stream.py │ ├── speculative │ ├── test_eagle_utils.py │ ├── test_ngram_utils.py │ └── test_speculative_sampling.py │ ├── test_activation.py │ ├── test_apply_token_bitmask_inplace.py │ ├── test_awq_dequant.py │ ├── test_bmm_fp8.py │ ├── test_causal_conv1d.py │ ├── test_copy.py │ ├── test_custom_allreduce.py │ ├── test_cutlass_mla.py │ ├── test_cutlass_w4a8_moe_mm.py │ ├── test_dsv3_fused_a_gemm.py │ ├── test_dsv3_router_gemm.py │ ├── test_es_fp8_blockwise_moe.py │ ├── test_es_mxfp8_blockscaled_moe.py │ ├── test_flash_attention.py │ ├── test_flash_attention_4.py │ ├── test_flashmla.py │ ├── test_fp4_gemm.py │ ├── test_fp4_quantize.py │ ├── test_fp8_blockwise_gemm.py │ ├── test_fp8_blockwise_moe.py │ ├── test_fp8_gemm.py │ ├── test_fused_qk_norm_rope.py │ ├── test_gguf.py │ ├── test_gptq_kernel.py │ ├── test_hadamard.py │ ├── test_int8_gemm.py │ ├── test_kimi_k2_moe_fused_gate.py │ ├── test_kvcacheio.py │ ├── test_marlin_gemm.py │ ├── test_marlin_repack.py │ ├── test_merge_state.py │ ├── test_merge_state_v2.py │ ├── test_moe_align.py │ ├── test_moe_fused_gate.py │ ├── test_moe_topk_sigmoid.py │ ├── test_moe_topk_softmax.py │ ├── test_mscclpp.py │ ├── test_norm.py │ ├── test_per_tensor_quant_fp8.py │ ├── test_per_token_group_quant_8bit.py │ ├── test_per_token_quant_fp8.py │ ├── test_qserve_w4a8_per_chn_gemm.py │ ├── test_qserve_w4a8_per_group_gemm.py │ ├── test_rotary_embedding.py │ ├── test_sampling.py │ ├── test_sparse_flash_attn.py │ ├── test_topk.py │ ├── test_torch_defaults_reset.py │ └── utils.py ├── sgl-model-gateway ├── .cargo │ └── config.toml ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── benches │ ├── request_processing.rs │ ├── tokenizer_benchmark.rs │ └── tool_parser_benchmark.rs ├── bindings │ ├── golang │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ ├── Makefile │ │ ├── README.md │ │ ├── client.go │ │ ├── client_test.go │ │ ├── examples │ │ │ ├── simple │ │ │ │ ├── main.go │ │ │ │ └── run.sh │ │ │ └── streaming │ │ │ │ ├── main.go │ │ │ │ └── run.sh │ │ ├── integration_test.go │ │ ├── internal │ │ │ └── ffi │ │ │ │ └── client.go │ │ └── src │ │ │ ├── client.rs │ │ │ ├── error.rs │ │ │ ├── grpc_converter.rs │ │ │ ├── lib.rs │ │ │ ├── memory.rs │ │ │ ├── stream.rs │ │ │ ├── tokenizer.rs │ │ │ ├── tool_parser.rs │ │ │ └── utils.rs │ └── python │ │ ├── .coveragerc │ │ ├── Cargo.toml │ │ ├── MANIFEST.in │ │ ├── README.md │ │ ├── pyproject.toml │ │ ├── setup.py │ │ ├── sglang_router │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── cli.py │ │ ├── launch_router.py │ │ ├── launch_server.py │ │ ├── mini_lb.py │ │ ├── router.py │ │ ├── router_args.py │ │ └── version.py │ │ └── src │ │ └── lib.rs ├── build.rs ├── examples │ └── wasm │ │ ├── .gitignore │ │ ├── README.md │ │ ├── wasm-guest-auth │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── build.sh │ │ └── src │ │ │ └── lib.rs │ │ ├── wasm-guest-logging │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── build.sh │ │ └── src │ │ │ └── lib.rs │ │ └── wasm-guest-ratelimit │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── build.sh │ │ └── src │ │ └── lib.rs ├── py_test │ ├── __init__.py │ ├── conftest.py │ ├── e2e_grpc │ │ ├── basic │ │ │ └── test_openai_server.py │ │ ├── conftest.py │ │ ├── features │ │ │ ├── test_enable_thinking.py │ │ │ └── test_reasoning_content.py │ │ ├── fixtures.py │ │ ├── function_call │ │ │ ├── test_openai_function_calling.py │ │ │ └── test_tool_choice.py │ │ ├── pytest.ini │ │ ├── util.py │ │ └── validation │ │ │ ├── test_large_max_new_tokens.py │ │ │ └── test_openai_server_ignore_eos.py │ ├── e2e_http │ │ ├── conftest.py │ │ ├── test_e2e_embeddings.py │ │ ├── test_pd_router.py │ │ └── test_regular_router.py │ ├── e2e_response_api │ │ ├── conftest.py │ │ ├── features │ │ │ ├── test_basic_crud.py │ │ │ ├── test_state_management.py │ │ │ ├── test_streaming_events.py │ │ │ ├── test_structured_output.py │ │ │ └── test_tools_call.py │ │ ├── router_fixtures.py │ │ └── util.py │ ├── fixtures │ │ ├── __init__.py │ │ ├── generate_test_certs.py │ │ ├── mock_worker.py │ │ ├── ports.py │ │ └── router_manager.py │ ├── integration_mock │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── load_balancing │ │ │ ├── __init__.py │ │ │ ├── test_cache_aware.py │ │ │ ├── test_power_of_two.py │ │ │ ├── test_random.py │ │ │ └── test_round_robin.py │ │ ├── test_api_auth.py │ │ ├── test_circuit_breaker.py │ │ ├── test_fault_tolerance.py │ │ ├── test_mtls.py │ │ ├── test_payload_size.py │ │ ├── test_pd_routing.py │ │ ├── test_rate_limiting.py │ │ ├── test_retries.py │ │ ├── test_service_discovery_shim.py │ │ └── test_worker_management.py │ └── unit │ │ ├── __init__.py │ │ ├── test_arg_parser.py │ │ ├── test_router_config.py │ │ ├── test_startup_sequence.py │ │ └── test_validation.py ├── pytest.ini ├── rustfmt.toml ├── scripts │ ├── generate_gateway_release_notes.sh │ ├── generate_vision_golden.py │ ├── run_benchmarks.py │ └── setup-sccache.sh ├── src │ ├── app_context.rs │ ├── config │ │ ├── builder.rs │ │ ├── mod.rs │ │ ├── types.rs │ │ └── validation.rs │ ├── core │ │ ├── circuit_breaker.rs │ │ ├── error.rs │ │ ├── job_queue.rs │ │ ├── metrics_aggregator.rs │ │ ├── mod.rs │ │ ├── model_card.rs │ │ ├── model_type.rs │ │ ├── retry.rs │ │ ├── token_bucket.rs │ │ ├── worker.rs │ │ ├── worker_builder.rs │ │ ├── worker_manager.rs │ │ ├── worker_registry.rs │ │ └── workflow │ │ │ ├── definition.rs │ │ │ ├── engine.rs │ │ │ ├── event.rs │ │ │ ├── executor.rs │ │ │ ├── mod.rs │ │ │ ├── state.rs │ │ │ ├── steps │ │ │ ├── external_worker_registration.rs │ │ │ ├── mcp_registration.rs │ │ │ ├── mod.rs │ │ │ ├── wasm_module_registration.rs │ │ │ ├── wasm_module_removal.rs │ │ │ ├── worker_registration.rs │ │ │ └── worker_removal.rs │ │ │ └── types.rs │ ├── data_connector │ │ ├── common.rs │ │ ├── core.rs │ │ ├── factory.rs │ │ ├── memory.rs │ │ ├── mod.rs │ │ ├── noop.rs │ │ ├── oracle.rs │ │ └── postgres.rs │ ├── grpc_client │ │ ├── mod.rs │ │ ├── sglang_scheduler.rs │ │ └── vllm_engine.rs │ ├── lib.rs │ ├── main.rs │ ├── mcp │ │ ├── config.rs │ │ ├── connection_pool.rs │ │ ├── error.rs │ │ ├── inventory.rs │ │ ├── manager.rs │ │ ├── mod.rs │ │ ├── oauth.rs │ │ ├── proxy.rs │ │ └── tool_args.rs │ ├── middleware.rs │ ├── multimodal │ │ ├── error.rs │ │ ├── media.rs │ │ ├── mod.rs │ │ ├── registry.rs │ │ ├── tracker.rs │ │ ├── types.rs │ │ └── vision │ │ │ ├── image_processor.rs │ │ │ ├── mod.rs │ │ │ ├── preprocessor_config.rs │ │ │ ├── processors │ │ │ ├── llama4_vision.rs │ │ │ ├── llava.rs │ │ │ ├── mod.rs │ │ │ ├── phi3_vision.rs │ │ │ ├── phi4_vision.rs │ │ │ ├── pixtral.rs │ │ │ ├── qwen2_vl.rs │ │ │ ├── qwen3_vl.rs │ │ │ └── qwen_vl_base.rs │ │ │ └── transforms.rs │ ├── observability │ │ ├── events.rs │ │ ├── logging.rs │ │ ├── metrics.rs │ │ ├── mod.rs │ │ └── otel_trace.rs │ ├── policies │ │ ├── bucket.rs │ │ ├── cache_aware.rs │ │ ├── factory.rs │ │ ├── mod.rs │ │ ├── power_of_two.rs │ │ ├── random.rs │ │ ├── registry.rs │ │ ├── round_robin.rs │ │ └── tree.rs │ ├── proto │ │ ├── sglang_scheduler.proto │ │ └── vllm_engine.proto │ ├── protocols │ │ ├── builders │ │ │ ├── chat │ │ │ │ ├── mod.rs │ │ │ │ ├── response.rs │ │ │ │ └── stream_response.rs │ │ │ ├── mod.rs │ │ │ └── responses │ │ │ │ ├── mod.rs │ │ │ │ └── response.rs │ │ ├── chat.rs │ │ ├── classify.rs │ │ ├── common.rs │ │ ├── completion.rs │ │ ├── embedding.rs │ │ ├── event_types.rs │ │ ├── generate.rs │ │ ├── mod.rs │ │ ├── rerank.rs │ │ ├── responses.rs │ │ ├── sampling_params.rs │ │ ├── validated.rs │ │ └── worker_spec.rs │ ├── reasoning_parser │ │ ├── README.md │ │ ├── factory.rs │ │ ├── mod.rs │ │ ├── parsers │ │ │ ├── base.rs │ │ │ ├── deepseek_r1.rs │ │ │ ├── glm45.rs │ │ │ ├── kimi.rs │ │ │ ├── minimax.rs │ │ │ ├── mod.rs │ │ │ ├── qwen3.rs │ │ │ └── step3.rs │ │ └── traits.rs │ ├── routers │ │ ├── conversations │ │ │ ├── handlers.rs │ │ │ └── mod.rs │ │ ├── factory.rs │ │ ├── grpc │ │ │ ├── client.rs │ │ │ ├── common │ │ │ │ ├── mod.rs │ │ │ │ ├── response_collection.rs │ │ │ │ ├── response_formatting.rs │ │ │ │ ├── responses │ │ │ │ │ ├── handlers.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── streaming.rs │ │ │ │ │ └── utils.rs │ │ │ │ └── stages │ │ │ │ │ ├── client_acquisition.rs │ │ │ │ │ ├── dispatch_metadata.rs │ │ │ │ │ ├── helpers.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── request_execution.rs │ │ │ │ │ └── worker_selection.rs │ │ │ ├── context.rs │ │ │ ├── error.rs │ │ │ ├── harmony │ │ │ │ ├── builder.rs │ │ │ │ ├── detector.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── parser.rs │ │ │ │ ├── processor.rs │ │ │ │ ├── responses.rs │ │ │ │ ├── stages │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── preparation.rs │ │ │ │ │ ├── request_building.rs │ │ │ │ │ └── response_processing.rs │ │ │ │ ├── streaming.rs │ │ │ │ └── types.rs │ │ │ ├── mod.rs │ │ │ ├── pd_router.rs │ │ │ ├── pipeline.rs │ │ │ ├── proto_wrapper.rs │ │ │ ├── regular │ │ │ │ ├── mod.rs │ │ │ │ ├── processor.rs │ │ │ │ ├── responses │ │ │ │ │ ├── context.rs │ │ │ │ │ ├── conversions.rs │ │ │ │ │ ├── handlers.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── tool_loop.rs │ │ │ │ │ └── types.rs │ │ │ │ ├── stages │ │ │ │ │ ├── chat │ │ │ │ │ │ ├── mod.rs │ │ │ │ │ │ ├── preparation.rs │ │ │ │ │ │ ├── request_building.rs │ │ │ │ │ │ └── response_processing.rs │ │ │ │ │ ├── generate │ │ │ │ │ │ ├── mod.rs │ │ │ │ │ │ ├── preparation.rs │ │ │ │ │ │ ├── request_building.rs │ │ │ │ │ │ └── response_processing.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── preparation.rs │ │ │ │ │ ├── request_building.rs │ │ │ │ │ └── response_processing.rs │ │ │ │ └── streaming.rs │ │ │ ├── router.rs │ │ │ └── utils.rs │ │ ├── header_utils.rs │ │ ├── http │ │ │ ├── mod.rs │ │ │ ├── pd_router.rs │ │ │ ├── pd_types.rs │ │ │ └── router.rs │ │ ├── mod.rs │ │ ├── openai │ │ │ ├── accumulator.rs │ │ │ ├── context.rs │ │ │ ├── conversations.rs │ │ │ ├── mcp.rs │ │ │ ├── mod.rs │ │ │ ├── provider.rs │ │ │ ├── responses.rs │ │ │ ├── router.rs │ │ │ ├── streaming.rs │ │ │ └── tool_handler.rs │ │ └── router_manager.rs │ ├── server.rs │ ├── service_discovery.rs │ ├── tokenizer │ │ ├── README.md │ │ ├── cache │ │ │ ├── fingerprint.rs │ │ │ ├── l0.rs │ │ │ ├── l1.rs │ │ │ └── mod.rs │ │ ├── chat_template.rs │ │ ├── factory.rs │ │ ├── hub.rs │ │ ├── huggingface.rs │ │ ├── mock.rs │ │ ├── mod.rs │ │ ├── sequence.rs │ │ ├── stop.rs │ │ ├── stream.rs │ │ ├── tests.rs │ │ ├── tiktoken.rs │ │ └── traits.rs │ ├── tool_parser │ │ ├── errors.rs │ │ ├── factory.rs │ │ ├── mod.rs │ │ ├── parsers │ │ │ ├── deepseek.rs │ │ │ ├── glm4_moe.rs │ │ │ ├── helpers.rs │ │ │ ├── json.rs │ │ │ ├── kimik2.rs │ │ │ ├── llama.rs │ │ │ ├── minimax_m2.rs │ │ │ ├── mistral.rs │ │ │ ├── mod.rs │ │ │ ├── passthrough.rs │ │ │ ├── pythonic.rs │ │ │ ├── qwen.rs │ │ │ └── step3.rs │ │ ├── partial_json.rs │ │ ├── state.rs │ │ ├── tests.rs │ │ ├── traits.rs │ │ └── types.rs │ ├── version.rs │ └── wasm │ │ ├── README.md │ │ ├── config.rs │ │ ├── errors.rs │ │ ├── interface │ │ └── spec.wit │ │ ├── mod.rs │ │ ├── module.rs │ │ ├── module_manager.rs │ │ ├── route.rs │ │ ├── runtime.rs │ │ ├── spec.rs │ │ └── types.rs └── tests │ ├── api_endpoints_test.rs │ ├── cache_aware_backward_compat_test.rs │ ├── chat_template_format_detection.rs │ ├── chat_template_integration.rs │ ├── chat_template_loading.rs │ ├── common │ ├── mock_mcp_server.rs │ ├── mock_openai_server.rs │ ├── mock_worker.rs │ ├── mod.rs │ ├── streaming_helpers.rs │ └── test_app.rs │ ├── fixtures │ └── images │ │ ├── grayscale.jpg │ │ ├── large.jpg │ │ ├── odd_dims.jpg │ │ ├── small.jpg │ │ ├── square.jpg │ │ ├── tall.jpg │ │ ├── tiny.jpg │ │ ├── very_tall.jpg │ │ ├── very_wide.jpg │ │ └── wide.jpg │ ├── mcp_test.rs │ ├── metrics_aggregator_test.rs │ ├── multimodal_tracker_test.rs │ ├── otel_tracing_test.rs │ ├── policy_registry_integration.rs │ ├── request_formats_test.rs │ ├── responses_api_test.rs │ ├── spec │ ├── chat_completion.rs │ ├── chat_message.rs │ ├── embedding.rs │ ├── mod.rs │ ├── rerank.rs │ └── responses.rs │ ├── spec_test.rs │ ├── streaming_tests.rs │ ├── test_openai_routing.rs │ ├── test_pd_routing.rs │ ├── tokenizer_cache_correctness_test.rs │ ├── tokenizer_integration.rs │ ├── tool_parser_deepseek.rs │ ├── tool_parser_edge_cases.rs │ ├── tool_parser_fallback.rs │ ├── tool_parser_glm4_moe.rs │ ├── tool_parser_json.rs │ ├── tool_parser_kimik2.rs │ ├── tool_parser_llama.rs │ ├── tool_parser_minimax_m2.rs │ ├── tool_parser_mistral.rs │ ├── tool_parser_mixed_edge_cases.rs │ ├── tool_parser_partial_json.rs │ ├── tool_parser_pythonic.rs │ ├── tool_parser_qwen.rs │ ├── tool_parser_step3.rs │ ├── vision_golden_tests.rs │ ├── wasm_test.rs │ └── workflow_test.rs └── test ├── README.md ├── lora_utils.py ├── manual ├── ascend │ ├── test_ascend_w8a8_quantization.py │ └── test_mindspore_models.py ├── cpu │ └── test_comm.py ├── debug_utils │ └── test_log_parser.py ├── entrypoints │ └── http_server │ │ └── test_abort_request.py ├── ep │ ├── test_deepep_internode.py │ ├── test_deepep_intranode.py │ ├── test_deepep_low_latency.py │ ├── test_eplb.py │ ├── test_hybrid_dp_ep_tp_mtp.py │ ├── test_moe_deepep.py │ ├── test_moe_deepep_eval_accuracy_large.py │ └── test_mooncake_ep_small.py ├── hicache │ └── test_disaggregation_hicache.py ├── kv_transfer │ └── test_mooncake_transfer_engine.py ├── lang_frontend │ ├── test_bind_cache.py │ ├── test_choices.py │ ├── test_jump_forward.py │ ├── test_openai_backend.py │ ├── test_separate_reasoning.py │ └── test_separate_reasoning_execution.py ├── layers │ ├── attention │ │ └── nsa │ │ │ ├── test_act_quant_triton.py │ │ │ └── test_index_buf_accessor.py │ └── moe │ │ └── test_moe_runners.py ├── lora │ ├── test_chunked_sgmv_backend.py │ ├── test_lora_cuda_graph.py │ ├── test_lora_llama4.py │ ├── test_lora_qwen3_vl.py │ └── test_lora_spec_decoding.py ├── models │ ├── test_clip_models.py │ ├── test_falcon_h1_models.py │ ├── test_gme_qwen_models.py │ ├── test_grok_models.py │ ├── test_llama4_models.py │ ├── test_mtp_models.py │ └── test_unsloth_models.py ├── nightly │ ├── test_deepseek_v31_perf.py │ ├── test_deepseek_v32_perf.py │ ├── test_text_models_gsm8k_eval.py │ ├── test_text_models_perf.py │ ├── test_vlms_mmmu_eval.py │ ├── test_vlms_perf.py │ └── test_vlms_piecewise_cuda_graph.py ├── openai_server │ └── features │ │ ├── test_cache_report.py │ │ ├── test_continuous_usage_stats.py │ │ └── test_structural_tag.py ├── piecewise_cudagraph │ └── test_disaggregation_piecewise_cuda_graph.py ├── quant │ └── test_fp8_kvcache.py ├── test_async_dynamic_batch_tokenizer.py ├── test_async_mm_data_processor.py ├── test_config_integration.py ├── test_custom_allreduce.py ├── test_deepseek_chat_templates.py ├── test_deepseek_v32_cp_single_node.py ├── test_double_sparsity.py ├── test_expert_distribution.py ├── test_expert_location_updater.py ├── test_fim_completion.py ├── test_forward_split_prefill.py ├── test_get_weights_by_name.py ├── test_health_check.py ├── test_kv_events.py ├── test_logprobs.py ├── test_lora_ops.py ├── test_mla_tp.py ├── test_modelopt.py ├── test_modelopt_fp8kvcache.py ├── test_models_from_modelscope.py ├── test_mscclpp.py ├── test_quick_allreduce.py ├── test_sagemaker_server.py ├── test_schedule_policy.py ├── test_session_control.py ├── test_srt_engine_with_quant_args.py ├── test_tokenizer_batch_encode.py ├── test_tokenizer_manager.py ├── test_torch_backend.py ├── test_torch_flex_attention_backend.py ├── test_torch_tp.py ├── test_tracing.py ├── test_triton_attention_rocm_mla.py ├── test_triton_moe_wna16.py ├── test_trtllm_fp8_kv_kernel.py ├── test_two_batch_overlap.py ├── test_vertex_endpoint.py ├── test_vlm_accuracy.py ├── test_wave_attention_backend.py └── test_weight_version.py ├── nightly ├── ascend │ ├── embedding_models │ │ └── test_ascend_embedding_models.py │ ├── llm_models │ │ ├── gsm8k_ascend_mixin.py │ │ ├── test_ascend_afm_4_5b.py │ │ ├── test_ascend_baichuan2_13b_chat.py │ │ ├── test_ascend_c4ai_command_r_v01.py │ │ ├── test_ascend_charglm2_6b.py │ │ ├── test_ascend_exaone_3.py │ │ ├── test_ascend_gemma_3_1b_it.py │ │ ├── test_ascend_glm4_9b_chat.py │ │ ├── test_ascend_granite_3_0_3b_a800m.py │ │ ├── test_ascend_granite_3_1_8b.py │ │ ├── test_ascend_internlm2_7b.py │ │ ├── test_ascend_ling_lite.py │ │ ├── test_ascend_llama_2_7b.py │ │ ├── test_ascend_mimo_7b_rl.py │ │ ├── test_ascend_mistral_7b.py │ │ ├── test_ascend_persimmon_8b_chat.py │ │ ├── test_ascend_phi_4_multimodal.py │ │ ├── test_ascend_smollm_1_7b.py │ │ └── tool_chat_template_c4ai_command_r_v01.jinja │ ├── rerank_models │ │ └── test_ascend_cross_encoder_models.py │ └── vlm_models │ │ ├── mmmu-val.yaml │ │ ├── test_ascend_gemma_3_4b_it.py │ │ ├── test_ascend_janus_pro_1b.py │ │ ├── test_ascend_janus_pro_7b.py │ │ ├── test_ascend_mimo_vl_7b_rl.py │ │ ├── test_ascend_minicpm_o_2_6.py │ │ ├── test_ascend_minicpm_v_2_6.py │ │ ├── test_ascend_phi4_multimodal_instruct.py │ │ ├── test_ascend_qwen2_5_vl_3b_instruct.py │ │ └── test_vlm_utils.py ├── nightly_utils.py ├── test_batch_invariant_ops.py ├── test_cpp_radix_cache.py ├── test_deepseek_r1_fp8_trtllm_backend.py ├── test_deepseek_v31_perf.py ├── test_deepseek_v32_nsabackend.py ├── test_deepseek_v32_perf.py ├── test_deepseek_v32_tp.py ├── test_deepseek_v3_deterministic.py ├── test_deepseek_v3_fp4_cutlass_moe.py ├── test_eagle_infer_beta_dp_attention_large.py ├── test_encoder_dp.py ├── test_flashinfer_trtllm_gen_attn_backend.py ├── test_flashinfer_trtllm_gen_moe_backend.py ├── test_fp4_moe.py ├── test_glm_4_6_perf.py ├── test_gpt_oss_4gpu_perf.py ├── test_kimi_k2_thinking_perf.py ├── test_lora_eviction_policy.py ├── test_lora_openai_api.py ├── test_lora_openai_compatible.py ├── test_lora_qwen3.py ├── test_lora_radix_cache.py ├── test_minimax_m2_perf.py ├── test_mistral_large3_perf.py ├── test_nsa_indexer.py ├── test_qwen3_235b_perf.py ├── test_qwen3_fp4_trtllm_gen_moe.py ├── test_qwen3_next_deterministic.py ├── test_text_models_gsm8k_eval.py ├── test_text_models_perf.py ├── test_vlms_mmmu_eval.py └── test_vlms_perf.py ├── pytest.ini ├── registered ├── function_call │ ├── test_function_call_parser.py │ ├── test_json_schema_constraint.py │ └── test_unknown_tool_name.py ├── stress │ ├── test_stress_deepseek_v3.py │ ├── test_stress_glm_4_6.py │ ├── test_stress_kimi_k2.py │ └── test_stress_qwen3_235b.py └── test_srt_backend.py ├── run_suite.py ├── run_suite_nightly.py └── srt ├── ascend ├── test_ascend_deepep.py ├── test_ascend_deepseek_mtp.py ├── test_ascend_graph_tp1_bf16.py ├── test_ascend_graph_tp2_bf16.py ├── test_ascend_hicache_mha.py ├── test_ascend_hicache_mla.py ├── test_ascend_mla_fia_w8a8int8.py ├── test_ascend_mla_w8a8int8.py ├── test_ascend_sampling_backend.py ├── test_ascend_tp1_bf16.py ├── test_ascend_tp2_bf16.py ├── test_ascend_tp2_fia_bf16.py └── test_ascend_tp4_bf16.py ├── configs ├── deepseek_v3.yaml ├── deepseek_v3_long_context.yaml ├── llama_405b.yaml ├── random_config.yaml ├── random_flashinfer_vs_triton_config.yaml └── sharegpt_config.yaml ├── cpu ├── test_activation.py ├── test_binding.py ├── test_causal_conv1d.py ├── test_cpu_graph.py ├── test_decode.py ├── test_extend.py ├── test_gemm.py ├── test_intel_amx_attention_backend_a.py ├── test_intel_amx_attention_backend_b.py ├── test_intel_amx_attention_backend_c.py ├── test_mamba.py ├── test_mla.py ├── test_moe.py ├── test_norm.py ├── test_qkv_proj_with_rope.py ├── test_qwen3.py ├── test_rope.py ├── test_shared_expert.py ├── test_topk.py └── utils.py ├── debug_utils └── test_tensor_dump_forward_hook.py ├── double-sparsity-config-Llama-3.1-8B-Instruct.json ├── ep ├── test_deepep_large.py ├── test_deepep_small.py └── test_moe_ep.py ├── experiment_runner.py ├── external_models └── custom_qwen2_vl.py ├── hicache ├── test_hicache_storage.py ├── test_hicache_storage_3fs_backend.py ├── test_hicache_storage_file_backend.py ├── test_hicache_storage_mooncake_backend.py └── test_hicache_variants.py ├── kv_cache_scales_llama3_1_8b.json ├── kv_cache_scales_llama3_8b.json ├── kv_cache_scales_qwen2_1_5b.json ├── layers └── attention │ └── mamba │ ├── test_causal_conv1d.py │ ├── test_mamba2_mixer.py │ ├── test_mamba_ssm.py │ └── test_mamba_ssm_ssd.py ├── lora ├── test_lora.py ├── test_lora_backend.py ├── test_lora_eviction.py ├── test_lora_tp.py ├── test_lora_update.py └── test_multi_lora_backend.py ├── models ├── compare.py ├── test_compressed_tensors_models.py ├── test_cross_encoder_models.py ├── test_dummy_grok_models.py ├── test_embedding_models.py ├── test_encoder_embedding_models.py ├── test_generation_models.py ├── test_glm4_moe_models.py ├── test_kimi_k2_models.py ├── test_kimi_linear_models.py ├── test_ministral3_models.py ├── test_nvidia_nemotron_nano_v2.py ├── test_nvidia_nemotron_nano_v2_vl.py ├── test_qwen3_next_models.py ├── test_qwen_models.py ├── test_reward_models.py ├── test_transformers_models.py └── test_vlm_models.py ├── nightly └── test_gsm8k_eval_amd.py ├── openai_server ├── __init__.py ├── basic │ ├── __init__.py │ ├── test_openai_embedding.py │ ├── test_openai_server.py │ ├── test_protocol.py │ ├── test_serving_chat.py │ ├── test_serving_completions.py │ └── test_serving_embedding.py ├── features │ ├── __init__.py │ ├── test_enable_thinking.py │ ├── test_json_mode.py │ ├── test_openai_server_ebnf.py │ ├── test_openai_server_hidden_states.py │ └── test_reasoning_content.py ├── function_call │ ├── __init__.py │ ├── test_openai_function_calling.py │ └── test_tool_choice.py └── validation │ ├── __init__.py │ ├── test_large_max_new_tokens.py │ ├── test_matched_stop.py │ ├── test_openai_server_ignore_eos.py │ └── test_request_length_validation.py ├── ops └── test_repeat_interleave.py ├── parse_results.py ├── quant ├── test_autoround.py ├── test_awq.py ├── test_awq_dequant.py ├── test_block_int8.py ├── test_fp8_kernel.py ├── test_fused_rms_fp8_group_quant.py ├── test_int8_kernel.py ├── test_triton_scaled_mm.py ├── test_w4a8_deepseek_v3.py └── test_w8a8_quantization.py ├── rl ├── test_fp32_lm_head.py ├── test_update_weights_from_disk.py ├── test_update_weights_from_distributed.py └── test_update_weights_from_tensor.py ├── rotary_embedding └── test_mrope.py ├── run_suite.py ├── test_abort.py ├── test_bench_one_batch.py ├── test_bench_serving.py ├── test_bench_typebaseddispatcher.py ├── test_bnb.py ├── test_build_eagle_tree.py ├── test_chunked_prefill.py ├── test_constrained_decoding.py ├── test_constrained_decoding_spec_reasoning.py ├── test_create_kvindices.py ├── test_cutedsl_moe.py ├── test_data_parallelism.py ├── test_deepseek_v32_basic.py ├── test_deepseek_v32_mtp.py ├── test_deepseek_v3_basic.py ├── test_deepseek_v3_cutedsl_4gpu.py ├── test_deepseek_v3_fp4_4gpu.py ├── test_deepseek_v3_mtp.py ├── test_deterministic.py ├── test_disaggregation_basic.py ├── test_disaggregation_different_tp.py ├── test_disaggregation_dp_attention.py ├── test_disaggregation_hybrid_attention.py ├── test_disaggregation_pp.py ├── test_dp_attention.py ├── test_eagle_constrained_decoding.py ├── test_eagle_dp_attention.py ├── test_eagle_infer_a.py ├── test_eagle_infer_b.py ├── test_eagle_infer_beta.py ├── test_eagle_infer_beta_dp_attention.py ├── test_eval_accuracy_large.py ├── test_eval_fp8_accuracy.py ├── test_external_models.py ├── test_fa3.py ├── test_flash_attention_4.py ├── test_flashmla.py ├── test_fp8_utils.py ├── test_fused_moe.py ├── test_gguf.py ├── test_gpt_oss_1gpu.py ├── test_gpt_oss_4gpu.py ├── test_gpt_oss_common.py ├── test_gptqmodel_dynamic.py ├── test_harmony_parser.py ├── test_hidden_states.py ├── test_hybrid_attn_backend.py ├── test_input_embeddings.py ├── test_io_struct.py ├── test_jinja_template_utils.py ├── test_llama31_fp4.py ├── test_load_weights_from_remote_instance.py ├── test_local_attn.py ├── test_mamba_unittest.py ├── test_metrics.py ├── test_metrics_utils.py ├── test_mistral_large3_basic.py ├── test_mla.py ├── test_mla_deepseek_v3.py ├── test_mla_flashinfer.py ├── test_mla_fp8.py ├── test_mla_int8_deepseek_v3.py ├── test_model_hooks.py ├── test_modelopt_export.py ├── test_modelopt_loader.py ├── test_moe_eval_accuracy_large.py ├── test_multi_instance_release_memory_occupation.py ├── test_multi_tokenizer.py ├── test_ngram_speculative_decoding.py ├── test_no_chunked_prefill.py ├── test_no_overlap_scheduler.py ├── test_original_logprobs.py ├── test_page_size.py ├── test_patch_torch.py ├── test_penalty.py ├── test_piecewise_cuda_graph.py ├── test_pp_single_node.py ├── test_priority_scheduling.py ├── test_profile_merger.py ├── test_profile_merger_http_api.py ├── test_profile_v2.py ├── test_pytorch_sampling_backend.py ├── test_quantization.py ├── test_radix_attention.py ├── test_radix_cache_unit.py ├── test_reasoning_parser.py ├── test_release_memory_occupation.py ├── test_request_queue_validation.py ├── test_retract_decode.py ├── test_rope_rocm.py ├── test_score_api.py ├── test_server_args.py ├── test_skip_tokenizer_init.py ├── test_speculative_registry.py ├── test_srt_endpoint.py ├── test_srt_engine.py ├── test_standalone_speculative_decoding.py ├── test_start_profile.py ├── test_swa_unittest.py ├── test_torch_compile.py ├── test_torch_compile_moe.py ├── test_torch_native_attention_backend.py ├── test_torchao.py ├── test_triton_attention_backend.py ├── test_triton_attention_kernels.py ├── test_triton_fused_moe.py ├── test_triton_moe_channel_fp8_kernel.py ├── test_triton_sliding_window.py ├── test_type_based_dispatcher.py ├── test_utils_update_weights.py ├── test_video_utils.py ├── test_vision_chunked_prefill.py ├── test_vision_openai_server_a.py ├── test_vision_openai_server_common.py ├── test_vlm_input_format.py ├── test_wave_attention_kernels.py └── xpu └── test_intel_xpu_backend.py /.clang-format-ignore: -------------------------------------------------------------------------------- 1 | sgl-kernel/3rdparty/tensorrt_llm/* 2 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.devcontainer/Dockerfile -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.devcontainer/devcontainer.json -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.editorconfig -------------------------------------------------------------------------------- /.github/CI_PERMISSIONS.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/CI_PERMISSIONS.json -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/FOLDER_README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/FOLDER_README.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/1-bug-report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/ISSUE_TEMPLATE/1-bug-report.yml -------------------------------------------------------------------------------- /.github/MAINTAINER.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/MAINTAINER.md -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/labeler.yml -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/pull_request_template.md -------------------------------------------------------------------------------- /.github/update_ci_permission.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/update_ci_permission.py -------------------------------------------------------------------------------- /.github/workflows/auto-format.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/auto-format.yml -------------------------------------------------------------------------------- /.github/workflows/auto-tune.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/auto-tune.yml -------------------------------------------------------------------------------- /.github/workflows/ci-monitor.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/ci-monitor.yml -------------------------------------------------------------------------------- /.github/workflows/execute-notebook.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/execute-notebook.yml -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/labeler.yml -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/lint.yml -------------------------------------------------------------------------------- /.github/workflows/nightly-test-amd.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/nightly-test-amd.yml -------------------------------------------------------------------------------- /.github/workflows/nightly-test-npu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/nightly-test-npu.yml -------------------------------------------------------------------------------- /.github/workflows/pr-benchmark-rust.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/pr-benchmark-rust.yml -------------------------------------------------------------------------------- /.github/workflows/pr-gate.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/pr-gate.yml -------------------------------------------------------------------------------- /.github/workflows/pr-test-amd.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/pr-test-amd.yml -------------------------------------------------------------------------------- /.github/workflows/pr-test-npu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/pr-test-npu.yml -------------------------------------------------------------------------------- /.github/workflows/pr-test-pd-router.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/pr-test-pd-router.yml -------------------------------------------------------------------------------- /.github/workflows/pr-test-rust.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/pr-test-rust.yml -------------------------------------------------------------------------------- /.github/workflows/pr-test-xeon.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/pr-test-xeon.yml -------------------------------------------------------------------------------- /.github/workflows/pr-test-xpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/pr-test-xpu.yml -------------------------------------------------------------------------------- /.github/workflows/pr-test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/pr-test.yml -------------------------------------------------------------------------------- /.github/workflows/release-docker.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/release-docker.yml -------------------------------------------------------------------------------- /.github/workflows/release-docs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/release-docs.yml -------------------------------------------------------------------------------- /.github/workflows/release-fake-tag.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/release-fake-tag.yml -------------------------------------------------------------------------------- /.github/workflows/release-pypi.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/release-pypi.yml -------------------------------------------------------------------------------- /.github/workflows/stress-test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.github/workflows/stress-test.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.gitignore -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | profile=black 3 | known_first_party=sglang 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /3rdparty/amd/profiling/PROFILING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/3rdparty/amd/profiling/PROFILING.md -------------------------------------------------------------------------------- /3rdparty/amd/profiling/client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/3rdparty/amd/profiling/client.sh -------------------------------------------------------------------------------- /3rdparty/amd/profiling/install_rpd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/3rdparty/amd/profiling/install_rpd.sh -------------------------------------------------------------------------------- /3rdparty/amd/profiling/loadTracer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/3rdparty/amd/profiling/loadTracer.sh -------------------------------------------------------------------------------- /3rdparty/amd/profiling/rpd.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/3rdparty/amd/profiling/rpd.patch -------------------------------------------------------------------------------- /3rdparty/amd/profiling/server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/3rdparty/amd/profiling/server.sh -------------------------------------------------------------------------------- /3rdparty/amd/tuning/TUNING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/3rdparty/amd/tuning/TUNING.md -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/README.md -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/assets/logo.png -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/assets/logo.svg -------------------------------------------------------------------------------- /assets/logo_square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/assets/logo_square.png -------------------------------------------------------------------------------- /assets/logo_square.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/assets/logo_square.svg -------------------------------------------------------------------------------- /benchmark/benchmark_vllm_060/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/benchmark_vllm_060/README.md -------------------------------------------------------------------------------- /benchmark/blog_v0_2/405b_sglang.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/blog_v0_2/405b_sglang.sh -------------------------------------------------------------------------------- /benchmark/blog_v0_2/405b_trt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/blog_v0_2/405b_trt.sh -------------------------------------------------------------------------------- /benchmark/blog_v0_2/405b_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/blog_v0_2/405b_vllm.sh -------------------------------------------------------------------------------- /benchmark/blog_v0_2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/blog_v0_2/README.md -------------------------------------------------------------------------------- /benchmark/blog_v0_2/config.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/blog_v0_2/config.md -------------------------------------------------------------------------------- /benchmark/boolq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/boolq/README.md -------------------------------------------------------------------------------- /benchmark/boolq/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/boolq/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/boolq/parquet_to_json.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/boolq/parquet_to_json.sh -------------------------------------------------------------------------------- /benchmark/ceval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/ceval/README.md -------------------------------------------------------------------------------- /benchmark/ceval/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/ceval/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/deepseek_v3/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/deepseek_v3/README.md -------------------------------------------------------------------------------- /benchmark/dspy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/dspy/README.md -------------------------------------------------------------------------------- /benchmark/dspy/bench_dspy_intro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/dspy/bench_dspy_intro.py -------------------------------------------------------------------------------- /benchmark/generative_agents/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/generative_agents/README.md -------------------------------------------------------------------------------- /benchmark/gpt_oss/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/gpt_oss/README.md -------------------------------------------------------------------------------- /benchmark/gsm8k/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/gsm8k/README.md -------------------------------------------------------------------------------- /benchmark/gsm8k/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/gsm8k/bench_other.py -------------------------------------------------------------------------------- /benchmark/gsm8k/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/gsm8k/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/hellaswag/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hellaswag/README.md -------------------------------------------------------------------------------- /benchmark/hellaswag/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hellaswag/bench_other.py -------------------------------------------------------------------------------- /benchmark/hellaswag/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hellaswag/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/hf3fs/bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hf3fs/bench.sh -------------------------------------------------------------------------------- /benchmark/hf3fs/bench_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hf3fs/bench_client.py -------------------------------------------------------------------------------- /benchmark/hf3fs/bench_storage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hf3fs/bench_storage.py -------------------------------------------------------------------------------- /benchmark/hf3fs/bench_zerocopy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hf3fs/bench_zerocopy.py -------------------------------------------------------------------------------- /benchmark/hicache/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hicache/README.md -------------------------------------------------------------------------------- /benchmark/hicache/bench_long_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hicache/bench_long_context.py -------------------------------------------------------------------------------- /benchmark/hicache/bench_mix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hicache/bench_mix.py -------------------------------------------------------------------------------- /benchmark/hicache/bench_mix.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hicache/bench_mix.sh -------------------------------------------------------------------------------- /benchmark/hicache/bench_multiturn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hicache/bench_multiturn.py -------------------------------------------------------------------------------- /benchmark/hicache/bench_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hicache/bench_serving.py -------------------------------------------------------------------------------- /benchmark/hicache/data_processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hicache/data_processing.py -------------------------------------------------------------------------------- /benchmark/hicache/download.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hicache/download.sh -------------------------------------------------------------------------------- /benchmark/hicache/nextqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hicache/nextqa.py -------------------------------------------------------------------------------- /benchmark/hicache/perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/hicache/perf.py -------------------------------------------------------------------------------- /benchmark/json_decode_regex/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/json_decode_regex/README.md -------------------------------------------------------------------------------- /benchmark/json_jump_forward/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/json_jump_forward/README.md -------------------------------------------------------------------------------- /benchmark/json_jump_forward/dataset.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/json_jump_forward/dataset.txt -------------------------------------------------------------------------------- /benchmark/json_schema/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/json_schema/README.md -------------------------------------------------------------------------------- /benchmark/json_schema/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/json_schema/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/kernels/deepseek/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/kernels/deepseek/README.md -------------------------------------------------------------------------------- /benchmark/line_retrieval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/line_retrieval/README.md -------------------------------------------------------------------------------- /benchmark/line_retrieval/gen_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/line_retrieval/gen_data.py -------------------------------------------------------------------------------- /benchmark/llava_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/llava_bench/README.md -------------------------------------------------------------------------------- /benchmark/llava_bench/bench_hf_mme.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/llava_bench/bench_hf_mme.sh -------------------------------------------------------------------------------- /benchmark/llava_bench/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/llava_bench/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/llava_bench/questions.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/llava_bench/questions.jsonl -------------------------------------------------------------------------------- /benchmark/llm_judge/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/llm_judge/README.md -------------------------------------------------------------------------------- /benchmark/llm_judge/articles.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/llm_judge/articles.jsonl -------------------------------------------------------------------------------- /benchmark/llm_judge/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/llm_judge/bench_other.py -------------------------------------------------------------------------------- /benchmark/llm_judge/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/llm_judge/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/long_json_decode/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/long_json_decode/README.md -------------------------------------------------------------------------------- /benchmark/lora/launch_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/lora/launch_server.py -------------------------------------------------------------------------------- /benchmark/lora/lora_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/lora/lora_bench.py -------------------------------------------------------------------------------- /benchmark/mmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mmlu/README.md -------------------------------------------------------------------------------- /benchmark/mmlu/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mmlu/bench_other.py -------------------------------------------------------------------------------- /benchmark/mmlu/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mmlu/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/mmlu/download_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mmlu/download_data.sh -------------------------------------------------------------------------------- /benchmark/mmmu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mmmu/README.md -------------------------------------------------------------------------------- /benchmark/mmmu/bench_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mmmu/bench_hf.py -------------------------------------------------------------------------------- /benchmark/mmmu/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mmmu/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/mmmu/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mmmu/data_utils.py -------------------------------------------------------------------------------- /benchmark/mmmu/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mmmu/eval_utils.py -------------------------------------------------------------------------------- /benchmark/mmmu/prompt_format.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mmmu/prompt_format.yaml -------------------------------------------------------------------------------- /benchmark/mtbench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mtbench/README.md -------------------------------------------------------------------------------- /benchmark/mtbench/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mtbench/bench_other.py -------------------------------------------------------------------------------- /benchmark/mtbench/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mtbench/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/mtbench/bench_sglang_eagle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/mtbench/bench_sglang_eagle.py -------------------------------------------------------------------------------- /benchmark/multi_document_qa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/multi_document_qa/README.md -------------------------------------------------------------------------------- /benchmark/multi_turn_chat/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/multi_turn_chat/README.md -------------------------------------------------------------------------------- /benchmark/multi_turn_chat/data_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/multi_turn_chat/data_gen.py -------------------------------------------------------------------------------- /benchmark/prefill_only/bench_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/prefill_only/bench_score.py -------------------------------------------------------------------------------- /benchmark/prefill_only/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/prefill_only/util.py -------------------------------------------------------------------------------- /benchmark/react/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/react/README.md -------------------------------------------------------------------------------- /benchmark/react/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/react/bench_other.py -------------------------------------------------------------------------------- /benchmark/react/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/react/bench_sglang.py -------------------------------------------------------------------------------- /benchmark/react/hotpotqa_100.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/react/hotpotqa_100.jsonl -------------------------------------------------------------------------------- /benchmark/reasoning_benchmark/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/reasoning_benchmark/README.md -------------------------------------------------------------------------------- /benchmark/tip_suggestion/.gitignore: -------------------------------------------------------------------------------- 1 | !topic.jsonl 2 | -------------------------------------------------------------------------------- /benchmark/tip_suggestion/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/tip_suggestion/README.md -------------------------------------------------------------------------------- /benchmark/tip_suggestion/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/tip_suggestion/bench_other.py -------------------------------------------------------------------------------- /benchmark/tip_suggestion/lmql_funcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/tip_suggestion/lmql_funcs.py -------------------------------------------------------------------------------- /benchmark/tip_suggestion/topic.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/tip_suggestion/topic.jsonl -------------------------------------------------------------------------------- /benchmark/tree_of_thought_v0/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/benchmark/tree_of_thought_v0/README.md -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/compose.yaml -------------------------------------------------------------------------------- /docker/configs/.gitconfig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/configs/.gitconfig -------------------------------------------------------------------------------- /docker/configs/.tmux.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/configs/.tmux.conf -------------------------------------------------------------------------------- /docker/configs/.vimrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/configs/.vimrc -------------------------------------------------------------------------------- /docker/configs/.zshrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/configs/.zshrc -------------------------------------------------------------------------------- /docker/configs/yank: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/configs/yank -------------------------------------------------------------------------------- /docker/diffusion.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/diffusion.Dockerfile -------------------------------------------------------------------------------- /docker/gateway.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/gateway.Dockerfile -------------------------------------------------------------------------------- /docker/k8s-sglang-distributed-sts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/k8s-sglang-distributed-sts.yaml -------------------------------------------------------------------------------- /docker/k8s-sglang-service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/k8s-sglang-service.yaml -------------------------------------------------------------------------------- /docker/npu.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/npu.Dockerfile -------------------------------------------------------------------------------- /docker/rocm.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/rocm.Dockerfile -------------------------------------------------------------------------------- /docker/sagemaker.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/sagemaker.Dockerfile -------------------------------------------------------------------------------- /docker/serve: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/serve -------------------------------------------------------------------------------- /docker/xeon.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/xeon.Dockerfile -------------------------------------------------------------------------------- /docker/xpu.Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docker/xpu.Dockerfile -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/_static/css/custom_log.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/_static/css/custom_log.css -------------------------------------------------------------------------------- /docs/_static/css/readthedocs.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/_static/css/readthedocs.css -------------------------------------------------------------------------------- /docs/_static/image/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/_static/image/logo.ico -------------------------------------------------------------------------------- /docs/_static/image/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/_static/image/logo.png -------------------------------------------------------------------------------- /docs/advanced_features/forward_hooks.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/advanced_features/forward_hooks.md -------------------------------------------------------------------------------- /docs/advanced_features/hicache.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/advanced_features/hicache.rst -------------------------------------------------------------------------------- /docs/advanced_features/lora.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/advanced_features/lora.ipynb -------------------------------------------------------------------------------- /docs/advanced_features/observability.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/advanced_features/observability.md -------------------------------------------------------------------------------- /docs/advanced_features/quantization.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/advanced_features/quantization.md -------------------------------------------------------------------------------- /docs/advanced_features/router.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/advanced_features/router.md -------------------------------------------------------------------------------- /docs/advanced_features/vlm_query.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/advanced_features/vlm_query.ipynb -------------------------------------------------------------------------------- /docs/basic_usage/deepseek_v3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/basic_usage/deepseek_v3.md -------------------------------------------------------------------------------- /docs/basic_usage/deepseek_v32.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/basic_usage/deepseek_v32.md -------------------------------------------------------------------------------- /docs/basic_usage/gpt_oss.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/basic_usage/gpt_oss.md -------------------------------------------------------------------------------- /docs/basic_usage/llama4.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/basic_usage/llama4.md -------------------------------------------------------------------------------- /docs/basic_usage/native_api.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/basic_usage/native_api.ipynb -------------------------------------------------------------------------------- /docs/basic_usage/openai_api.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/basic_usage/openai_api.rst -------------------------------------------------------------------------------- /docs/basic_usage/qwen3.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/basic_usage/qwen3.md -------------------------------------------------------------------------------- /docs/basic_usage/qwen3_vl.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/basic_usage/qwen3_vl.md -------------------------------------------------------------------------------- /docs/basic_usage/sampling_params.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/basic_usage/sampling_params.md -------------------------------------------------------------------------------- /docs/basic_usage/send_request.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/basic_usage/send_request.ipynb -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/deploy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/deploy.py -------------------------------------------------------------------------------- /docs/developer_guide/bench_serving.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/developer_guide/bench_serving.md -------------------------------------------------------------------------------- /docs/developer_guide/release_process.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/developer_guide/release_process.md -------------------------------------------------------------------------------- /docs/get_started/install.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/get_started/install.md -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/platforms/amd_gpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/platforms/amd_gpu.md -------------------------------------------------------------------------------- /docs/platforms/ascend_npu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/platforms/ascend_npu.md -------------------------------------------------------------------------------- /docs/platforms/ascend_npu_support.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/platforms/ascend_npu_support.rst -------------------------------------------------------------------------------- /docs/platforms/cpu_server.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/platforms/cpu_server.md -------------------------------------------------------------------------------- /docs/platforms/nvidia_jetson.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/platforms/nvidia_jetson.md -------------------------------------------------------------------------------- /docs/platforms/tpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/platforms/tpu.md -------------------------------------------------------------------------------- /docs/platforms/xpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/platforms/xpu.md -------------------------------------------------------------------------------- /docs/references/custom_chat_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/references/custom_chat_template.md -------------------------------------------------------------------------------- /docs/references/faq.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/references/faq.md -------------------------------------------------------------------------------- /docs/references/learn_more.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/references/learn_more.md -------------------------------------------------------------------------------- /docs/references/mindspore_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/references/mindspore_models.md -------------------------------------------------------------------------------- /docs/references/production_metrics.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/references/production_metrics.md -------------------------------------------------------------------------------- /docs/references/torch_compile_cache.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/references/torch_compile_cache.md -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/requirements.txt -------------------------------------------------------------------------------- /docs/serve.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/serve.sh -------------------------------------------------------------------------------- /docs/supported_models/modelscope.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/supported_models/modelscope.md -------------------------------------------------------------------------------- /docs/supported_models/rerank_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/supported_models/rerank_models.md -------------------------------------------------------------------------------- /docs/supported_models/reward_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/supported_models/reward_models.md -------------------------------------------------------------------------------- /docs/wrap_run_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/docs/wrap_run_llm.py -------------------------------------------------------------------------------- /examples/assets/.gitignore: -------------------------------------------------------------------------------- 1 | !example_image.png 2 | -------------------------------------------------------------------------------- /examples/assets/example_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/assets/example_image.png -------------------------------------------------------------------------------- /examples/checkpoint_engine/update.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/checkpoint_engine/update.py -------------------------------------------------------------------------------- /examples/monitoring/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/monitoring/README.md -------------------------------------------------------------------------------- /examples/monitoring/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/monitoring/docker-compose.yaml -------------------------------------------------------------------------------- /examples/monitoring/opentelemetry.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/monitoring/opentelemetry.yaml -------------------------------------------------------------------------------- /examples/monitoring/prometheus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/monitoring/prometheus.yaml -------------------------------------------------------------------------------- /examples/runtime/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/runtime/README.md -------------------------------------------------------------------------------- /examples/runtime/engine/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/runtime/engine/embedding.py -------------------------------------------------------------------------------- /examples/runtime/engine/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/runtime/engine/readme.md -------------------------------------------------------------------------------- /examples/runtime/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/runtime/lora.py -------------------------------------------------------------------------------- /examples/runtime/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/runtime/reward_model.py -------------------------------------------------------------------------------- /examples/runtime/vertex_predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/examples/runtime/vertex_predict.py -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/pyproject.toml -------------------------------------------------------------------------------- /python/pyproject_cpu.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/pyproject_cpu.toml -------------------------------------------------------------------------------- /python/pyproject_other.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/pyproject_other.toml -------------------------------------------------------------------------------- /python/pyproject_xpu.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/pyproject_xpu.toml -------------------------------------------------------------------------------- /python/sglang/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/README.md -------------------------------------------------------------------------------- /python/sglang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/__init__.py -------------------------------------------------------------------------------- /python/sglang/bench_one_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/bench_one_batch.py -------------------------------------------------------------------------------- /python/sglang/bench_one_batch_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/bench_one_batch_server.py -------------------------------------------------------------------------------- /python/sglang/bench_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/bench_serving.py -------------------------------------------------------------------------------- /python/sglang/check_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/check_env.py -------------------------------------------------------------------------------- /python/sglang/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/sglang/cli/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/cli/generate.py -------------------------------------------------------------------------------- /python/sglang/cli/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/cli/main.py -------------------------------------------------------------------------------- /python/sglang/cli/serve.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/cli/serve.py -------------------------------------------------------------------------------- /python/sglang/cli/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/cli/utils.py -------------------------------------------------------------------------------- /python/sglang/compile_deep_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/compile_deep_gemm.py -------------------------------------------------------------------------------- /python/sglang/eval/llama3_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/eval/llama3_eval.py -------------------------------------------------------------------------------- /python/sglang/eval/loogle_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/eval/loogle_eval.py -------------------------------------------------------------------------------- /python/sglang/global_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/global_config.py -------------------------------------------------------------------------------- /python/sglang/jit_kernel/.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/jit_kernel/.clang-format -------------------------------------------------------------------------------- /python/sglang/jit_kernel/hicache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/jit_kernel/hicache.py -------------------------------------------------------------------------------- /python/sglang/jit_kernel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/jit_kernel/utils.py -------------------------------------------------------------------------------- /python/sglang/lang/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/lang/api.py -------------------------------------------------------------------------------- /python/sglang/lang/backend/anthropic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/lang/backend/anthropic.py -------------------------------------------------------------------------------- /python/sglang/lang/backend/litellm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/lang/backend/litellm.py -------------------------------------------------------------------------------- /python/sglang/lang/backend/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/lang/backend/openai.py -------------------------------------------------------------------------------- /python/sglang/lang/backend/vertexai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/lang/backend/vertexai.py -------------------------------------------------------------------------------- /python/sglang/lang/chat_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/lang/chat_template.py -------------------------------------------------------------------------------- /python/sglang/lang/choices.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/lang/choices.py -------------------------------------------------------------------------------- /python/sglang/lang/interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/lang/interpreter.py -------------------------------------------------------------------------------- /python/sglang/lang/ir.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/lang/ir.py -------------------------------------------------------------------------------- /python/sglang/lang/tracer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/lang/tracer.py -------------------------------------------------------------------------------- /python/sglang/launch_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/launch_server.py -------------------------------------------------------------------------------- /python/sglang/multimodal_gen/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/multimodal_gen/README.md -------------------------------------------------------------------------------- /python/sglang/multimodal_gen/envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/multimodal_gen/envs.py -------------------------------------------------------------------------------- /python/sglang/multimodal_gen/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/multimodal_gen/utils.py -------------------------------------------------------------------------------- /python/sglang/profiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/profiler.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/__init__.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/chatglm.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/dbrx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/dbrx.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/dots_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/dots_ocr.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/dots_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/dots_vlm.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/exaone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/exaone.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/falcon_h1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/falcon_h1.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/internvl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/internvl.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/janus_pro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/janus_pro.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/jet_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/jet_vlm.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/kimi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/kimi_vl.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/nemotron_h.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/nemotron_h.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/olmo3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/olmo3.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/qwen3_next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/qwen3_next.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/qwen3_omni.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/qwen3_omni.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/qwen3_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/qwen3_vl.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/radio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/radio.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/step3_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/step3_vl.py -------------------------------------------------------------------------------- /python/sglang/srt/configs/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/configs/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/connector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/connector/__init__.py -------------------------------------------------------------------------------- /python/sglang/srt/connector/redis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/connector/redis.py -------------------------------------------------------------------------------- /python/sglang/srt/connector/s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/connector/s3.py -------------------------------------------------------------------------------- /python/sglang/srt/connector/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/connector/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/constants.py -------------------------------------------------------------------------------- /python/sglang/srt/constrained/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/constrained/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/custom_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/custom_op.py -------------------------------------------------------------------------------- /python/sglang/srt/debug_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/sglang/srt/debug_utils/dumper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/debug_utils/dumper.py -------------------------------------------------------------------------------- /python/sglang/srt/distributed/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/distributed/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/dllm/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/dllm/config.py -------------------------------------------------------------------------------- /python/sglang/srt/entrypoints/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/entrypoints/engine.py -------------------------------------------------------------------------------- /python/sglang/srt/entrypoints/openai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/sglang/srt/entrypoints/tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/entrypoints/tool.py -------------------------------------------------------------------------------- /python/sglang/srt/entrypoints/warmup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/entrypoints/warmup.py -------------------------------------------------------------------------------- /python/sglang/srt/environ.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/environ.py -------------------------------------------------------------------------------- /python/sglang/srt/eplb/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/sglang/srt/eplb/eplb_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/eplb/eplb_manager.py -------------------------------------------------------------------------------- /python/sglang/srt/eplb/eplb_simulator/__init__.py: -------------------------------------------------------------------------------- 1 | from . import reader 2 | -------------------------------------------------------------------------------- /python/sglang/srt/grpc/__init__.py: -------------------------------------------------------------------------------- 1 | # SGLang gRPC module 2 | -------------------------------------------------------------------------------- /python/sglang/srt/grpc/compile_proto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/grpc/compile_proto.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/activation.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/amx_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/amx_utils.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/deep_gemm_wrapper/__init__.py: -------------------------------------------------------------------------------- 1 | from .entrypoint import * 2 | -------------------------------------------------------------------------------- /python/sglang/srt/layers/elementwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/elementwise.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/layernorm.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/linear.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/moe/ep_moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/sglang/srt/layers/moe/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/moe/router.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/moe/topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/moe/topk.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/moe/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/moe/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/multimodal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/multimodal.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/parameter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/parameter.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/pooler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/pooler.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/quantization/quark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/sglang/srt/layers/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/sampler.py -------------------------------------------------------------------------------- /python/sglang/srt/layers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/layers/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/lora/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/lora/layers.py -------------------------------------------------------------------------------- /python/sglang/srt/lora/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/lora/lora.py -------------------------------------------------------------------------------- /python/sglang/srt/lora/lora_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/lora/lora_config.py -------------------------------------------------------------------------------- /python/sglang/srt/lora/lora_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/lora/lora_manager.py -------------------------------------------------------------------------------- /python/sglang/srt/lora/lora_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/lora/lora_registry.py -------------------------------------------------------------------------------- /python/sglang/srt/lora/mem_pool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/lora/mem_pool.py -------------------------------------------------------------------------------- /python/sglang/srt/lora/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/lora/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/io_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/managers/io_struct.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/managers/mm_utils.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/managers/scheduler.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/tp_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/managers/tp_worker.py -------------------------------------------------------------------------------- /python/sglang/srt/managers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/managers/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/mem_cache/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/mem_cache/common.py -------------------------------------------------------------------------------- /python/sglang/srt/mem_cache/cpp_radix_tree/.clang-format: -------------------------------------------------------------------------------- 1 | ../../../../../sgl-kernel/.clang-format -------------------------------------------------------------------------------- /python/sglang/srt/mem_cache/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/mem_cache/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/metrics/collector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/metrics/collector.py -------------------------------------------------------------------------------- /python/sglang/srt/metrics/func_timer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/metrics/func_timer.py -------------------------------------------------------------------------------- /python/sglang/srt/metrics/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/metrics/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/model_loader/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/model_loader/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/models/apertus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/apertus.py -------------------------------------------------------------------------------- /python/sglang/srt/models/arcee.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/arcee.py -------------------------------------------------------------------------------- /python/sglang/srt/models/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/baichuan.py -------------------------------------------------------------------------------- /python/sglang/srt/models/bailing_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/bailing_moe.py -------------------------------------------------------------------------------- /python/sglang/srt/models/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/bert.py -------------------------------------------------------------------------------- /python/sglang/srt/models/chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/chatglm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/clip.py -------------------------------------------------------------------------------- /python/sglang/srt/models/commandr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/commandr.py -------------------------------------------------------------------------------- /python/sglang/srt/models/dbrx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/dbrx.py -------------------------------------------------------------------------------- /python/sglang/srt/models/deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/deepseek.py -------------------------------------------------------------------------------- /python/sglang/srt/models/deepseek_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/deepseek_v2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/dots_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/dots_ocr.py -------------------------------------------------------------------------------- /python/sglang/srt/models/dots_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/dots_vlm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/ernie4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/ernie4.py -------------------------------------------------------------------------------- /python/sglang/srt/models/exaone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/exaone.py -------------------------------------------------------------------------------- /python/sglang/srt/models/falcon_h1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/falcon_h1.py -------------------------------------------------------------------------------- /python/sglang/srt/models/gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/gemma.py -------------------------------------------------------------------------------- /python/sglang/srt/models/gemma2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/gemma2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/gemma3_mm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/gemma3_mm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/gemma3n_mm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/gemma3n_mm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/glm4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/glm4.py -------------------------------------------------------------------------------- /python/sglang/srt/models/glm4_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/glm4_moe.py -------------------------------------------------------------------------------- /python/sglang/srt/models/glm4v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/glm4v.py -------------------------------------------------------------------------------- /python/sglang/srt/models/glm4v_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/glm4v_moe.py -------------------------------------------------------------------------------- /python/sglang/srt/models/gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/gpt2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/gpt_bigcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/gpt_bigcode.py -------------------------------------------------------------------------------- /python/sglang/srt/models/gpt_oss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/gpt_oss.py -------------------------------------------------------------------------------- /python/sglang/srt/models/granite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/granite.py -------------------------------------------------------------------------------- /python/sglang/srt/models/granitemoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/granitemoe.py -------------------------------------------------------------------------------- /python/sglang/srt/models/grok.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/grok.py -------------------------------------------------------------------------------- /python/sglang/srt/models/hunyuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/hunyuan.py -------------------------------------------------------------------------------- /python/sglang/srt/models/idefics2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/idefics2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/internlm2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/internlm2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/interns1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/interns1.py -------------------------------------------------------------------------------- /python/sglang/srt/models/internvl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/internvl.py -------------------------------------------------------------------------------- /python/sglang/srt/models/jet_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/jet_vlm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/kimi_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/kimi_linear.py -------------------------------------------------------------------------------- /python/sglang/srt/models/kimi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/kimi_vl.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llada2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/llada2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/llama.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llama4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/llama4.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llama_eagle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/llama_eagle.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/llava.py -------------------------------------------------------------------------------- /python/sglang/srt/models/llavavid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/llavavid.py -------------------------------------------------------------------------------- /python/sglang/srt/models/mimo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/mimo.py -------------------------------------------------------------------------------- /python/sglang/srt/models/mimo_mtp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/mimo_mtp.py -------------------------------------------------------------------------------- /python/sglang/srt/models/mindspore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/mindspore.py -------------------------------------------------------------------------------- /python/sglang/srt/models/minicpm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/minicpm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/minicpm3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/minicpm3.py -------------------------------------------------------------------------------- /python/sglang/srt/models/minicpmo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/minicpmo.py -------------------------------------------------------------------------------- /python/sglang/srt/models/minicpmv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/minicpmv.py -------------------------------------------------------------------------------- /python/sglang/srt/models/minimax_m2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/minimax_m2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/ministral3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/ministral3.py -------------------------------------------------------------------------------- /python/sglang/srt/models/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/mistral.py -------------------------------------------------------------------------------- /python/sglang/srt/models/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/mixtral.py -------------------------------------------------------------------------------- /python/sglang/srt/models/mllama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/mllama.py -------------------------------------------------------------------------------- /python/sglang/srt/models/mllama4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/mllama4.py -------------------------------------------------------------------------------- /python/sglang/srt/models/nemotron_h.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/nemotron_h.py -------------------------------------------------------------------------------- /python/sglang/srt/models/nvila.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/nvila.py -------------------------------------------------------------------------------- /python/sglang/srt/models/nvila_lite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/nvila_lite.py -------------------------------------------------------------------------------- /python/sglang/srt/models/olmo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/olmo.py -------------------------------------------------------------------------------- /python/sglang/srt/models/olmo2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/olmo2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/olmoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/olmoe.py -------------------------------------------------------------------------------- /python/sglang/srt/models/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/opt.py -------------------------------------------------------------------------------- /python/sglang/srt/models/orion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/orion.py -------------------------------------------------------------------------------- /python/sglang/srt/models/persimmon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/persimmon.py -------------------------------------------------------------------------------- /python/sglang/srt/models/phi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/phi.py -------------------------------------------------------------------------------- /python/sglang/srt/models/phi3_small.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/phi3_small.py -------------------------------------------------------------------------------- /python/sglang/srt/models/phi4mm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/phi4mm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/phimoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/phimoe.py -------------------------------------------------------------------------------- /python/sglang/srt/models/pixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/pixtral.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen2_5_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen2_5_vl.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen2_audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen2_audio.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen2_eagle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen2_eagle.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen2_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen2_moe.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen2_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen2_rm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen2_vl.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen3.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen3_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen3_moe.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen3_next.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen3_next.py -------------------------------------------------------------------------------- /python/sglang/srt/models/qwen3_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/qwen3_vl.py -------------------------------------------------------------------------------- /python/sglang/srt/models/radio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/radio.py -------------------------------------------------------------------------------- /python/sglang/srt/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/registry.py -------------------------------------------------------------------------------- /python/sglang/srt/models/roberta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/roberta.py -------------------------------------------------------------------------------- /python/sglang/srt/models/siglip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/siglip.py -------------------------------------------------------------------------------- /python/sglang/srt/models/solar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/solar.py -------------------------------------------------------------------------------- /python/sglang/srt/models/stablelm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/stablelm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/starcoder2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/starcoder2.py -------------------------------------------------------------------------------- /python/sglang/srt/models/step3_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/step3_vl.py -------------------------------------------------------------------------------- /python/sglang/srt/models/teleflm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/teleflm.py -------------------------------------------------------------------------------- /python/sglang/srt/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/utils.py -------------------------------------------------------------------------------- /python/sglang/srt/models/xverse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/xverse.py -------------------------------------------------------------------------------- /python/sglang/srt/models/xverse_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/xverse_moe.py -------------------------------------------------------------------------------- /python/sglang/srt/models/yivl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/models/yivl.py -------------------------------------------------------------------------------- /python/sglang/srt/server_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/server_args.py -------------------------------------------------------------------------------- /python/sglang/srt/tracing/trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/tracing/trace.py -------------------------------------------------------------------------------- /python/sglang/srt/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/utils/__init__.py -------------------------------------------------------------------------------- /python/sglang/srt/utils/aio_rwlock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/utils/aio_rwlock.py -------------------------------------------------------------------------------- /python/sglang/srt/utils/bench_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/utils/bench_utils.py -------------------------------------------------------------------------------- /python/sglang/srt/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/utils/common.py -------------------------------------------------------------------------------- /python/sglang/srt/utils/numa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/utils/numa_utils.py -------------------------------------------------------------------------------- /python/sglang/srt/utils/offloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/utils/offloader.py -------------------------------------------------------------------------------- /python/sglang/srt/utils/patch_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/utils/patch_torch.py -------------------------------------------------------------------------------- /python/sglang/srt/utils/rpd_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/utils/rpd_utils.py -------------------------------------------------------------------------------- /python/sglang/srt/weight_sync/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/srt/weight_sync/utils.py -------------------------------------------------------------------------------- /python/sglang/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/sglang/test/attention/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/sglang/test/ci/ci_register.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/ci/ci_register.py -------------------------------------------------------------------------------- /python/sglang/test/ci/ci_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/ci/ci_utils.py -------------------------------------------------------------------------------- /python/sglang/test/doc_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/doc_patch.py -------------------------------------------------------------------------------- /python/sglang/test/few_shot_gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/few_shot_gsm8k.py -------------------------------------------------------------------------------- /python/sglang/test/get_logits_ut.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/get_logits_ut.py -------------------------------------------------------------------------------- /python/sglang/test/gsm8k_mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/gsm8k_mixin.py -------------------------------------------------------------------------------- /python/sglang/test/kl_test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/kl_test_utils.py -------------------------------------------------------------------------------- /python/sglang/test/long_prompt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/long_prompt.txt -------------------------------------------------------------------------------- /python/sglang/test/longbench_v2/__init__.py: -------------------------------------------------------------------------------- 1 | """LongBench-v2 auxiliary utilities and validation scripts.""" 2 | -------------------------------------------------------------------------------- /python/sglang/test/mmmu_vlm_mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/mmmu_vlm_mixin.py -------------------------------------------------------------------------------- /python/sglang/test/run_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/run_eval.py -------------------------------------------------------------------------------- /python/sglang/test/runners.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/runners.py -------------------------------------------------------------------------------- /python/sglang/test/send_one.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/send_one.py -------------------------------------------------------------------------------- /python/sglang/test/simple_eval_gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/simple_eval_gpqa.py -------------------------------------------------------------------------------- /python/sglang/test/simple_eval_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/simple_eval_math.py -------------------------------------------------------------------------------- /python/sglang/test/simple_eval_mgsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/simple_eval_mgsm.py -------------------------------------------------------------------------------- /python/sglang/test/simple_eval_mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/simple_eval_mmlu.py -------------------------------------------------------------------------------- /python/sglang/test/test_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/test_activation.py -------------------------------------------------------------------------------- /python/sglang/test/test_block_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/test_block_fp8.py -------------------------------------------------------------------------------- /python/sglang/test/test_custom_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/test_custom_ops.py -------------------------------------------------------------------------------- /python/sglang/test/test_cutlass_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/test_cutlass_moe.py -------------------------------------------------------------------------------- /python/sglang/test/test_deepep_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/test_deepep_utils.py -------------------------------------------------------------------------------- /python/sglang/test/test_layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/test_layernorm.py -------------------------------------------------------------------------------- /python/sglang/test/test_marlin_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/test_marlin_moe.py -------------------------------------------------------------------------------- /python/sglang/test/test_marlin_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/test_marlin_utils.py -------------------------------------------------------------------------------- /python/sglang/test/test_programs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/test_programs.py -------------------------------------------------------------------------------- /python/sglang/test/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/test/test_utils.py -------------------------------------------------------------------------------- /python/sglang/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/python/sglang/utils.py -------------------------------------------------------------------------------- /python/sglang/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.5.6" 2 | -------------------------------------------------------------------------------- /scripts/check_vram_clear.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/check_vram_clear.sh -------------------------------------------------------------------------------- /scripts/ci/amd_ci_exec.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/amd_ci_exec.sh -------------------------------------------------------------------------------- /scripts/ci/amd_ci_install_dependency.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/amd_ci_install_dependency.sh -------------------------------------------------------------------------------- /scripts/ci/amd_ci_start_container.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/amd_ci_start_container.sh -------------------------------------------------------------------------------- /scripts/ci/ci_install_deepep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/ci_install_deepep.sh -------------------------------------------------------------------------------- /scripts/ci/ci_install_dependency.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/ci_install_dependency.sh -------------------------------------------------------------------------------- /scripts/ci/ci_install_rust.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/ci_install_rust.sh -------------------------------------------------------------------------------- /scripts/ci/cleanup_hf_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/cleanup_hf_cache.py -------------------------------------------------------------------------------- /scripts/ci/npu_ci_install_dependency.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/npu_ci_install_dependency.sh -------------------------------------------------------------------------------- /scripts/ci/npu_log_print.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/npu_log_print.sh -------------------------------------------------------------------------------- /scripts/ci/prepare_runner.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/prepare_runner.sh -------------------------------------------------------------------------------- /scripts/ci/publish_traces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/publish_traces.py -------------------------------------------------------------------------------- /scripts/ci/slash_command_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/slash_command_handler.py -------------------------------------------------------------------------------- /scripts/ci/test_rccl_multi_gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci/test_rccl_multi_gpu.py -------------------------------------------------------------------------------- /scripts/ci_monitor/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci_monitor/README.md -------------------------------------------------------------------------------- /scripts/ci_monitor/ci_analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci_monitor/ci_analyzer.py -------------------------------------------------------------------------------- /scripts/ci_monitor/ci_analyzer_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ci_monitor/ci_analyzer_perf.py -------------------------------------------------------------------------------- /scripts/code_sync/copy_from_oss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/code_sync/copy_from_oss.py -------------------------------------------------------------------------------- /scripts/code_sync/copy_to_oss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/code_sync/copy_to_oss.py -------------------------------------------------------------------------------- /scripts/code_sync/guideline.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/code_sync/guideline.md -------------------------------------------------------------------------------- /scripts/code_sync/install_github_cli.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/code_sync/install_github_cli.sh -------------------------------------------------------------------------------- /scripts/convert_otel_2_perfetto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/convert_otel_2_perfetto.py -------------------------------------------------------------------------------- /scripts/ensure_vram_clear.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/ensure_vram_clear.sh -------------------------------------------------------------------------------- /scripts/export_deepseek_nextn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/export_deepseek_nextn.py -------------------------------------------------------------------------------- /scripts/killall_sglang.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/killall_sglang.sh -------------------------------------------------------------------------------- /scripts/playground/bench_speculative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/playground/bench_speculative.py -------------------------------------------------------------------------------- /scripts/playground/load_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/playground/load_tokenizer.py -------------------------------------------------------------------------------- /scripts/playground/lora/analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/playground/lora/analyzer.py -------------------------------------------------------------------------------- /scripts/playground/lora/lora_hf_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/playground/lora/lora_hf_play.py -------------------------------------------------------------------------------- /scripts/playground/reference_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/playground/reference_hf.py -------------------------------------------------------------------------------- /scripts/playground/router/test_tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/playground/router/test_tree.py -------------------------------------------------------------------------------- /scripts/playground/router/tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/playground/router/tree.py -------------------------------------------------------------------------------- /scripts/release/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/release/README.md -------------------------------------------------------------------------------- /scripts/release/bump_kernel_version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/release/bump_kernel_version.py -------------------------------------------------------------------------------- /scripts/release/bump_sglang_version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/release/bump_sglang_version.py -------------------------------------------------------------------------------- /scripts/release/commit_and_pr.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/release/commit_and_pr.sh -------------------------------------------------------------------------------- /scripts/release/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/release/test_utils.py -------------------------------------------------------------------------------- /scripts/release/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/release/utils.py -------------------------------------------------------------------------------- /scripts/update_kernel_whl_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/update_kernel_whl_index.py -------------------------------------------------------------------------------- /scripts/version_branch_to_tag.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/scripts/version_branch_to_tag.sh -------------------------------------------------------------------------------- /sgl-kernel/.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/.clang-format -------------------------------------------------------------------------------- /sgl-kernel/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/CMakeLists.txt -------------------------------------------------------------------------------- /sgl-kernel/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/LICENSE -------------------------------------------------------------------------------- /sgl-kernel/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/Makefile -------------------------------------------------------------------------------- /sgl-kernel/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/README.md -------------------------------------------------------------------------------- /sgl-kernel/THIRDPARTYNOTICES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/THIRDPARTYNOTICES.txt -------------------------------------------------------------------------------- /sgl-kernel/analyze_whl_kernel_sizes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/analyze_whl_kernel_sizes.py -------------------------------------------------------------------------------- /sgl-kernel/benchmark/bench_fp4_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/benchmark/bench_fp4_gemm.py -------------------------------------------------------------------------------- /sgl-kernel/benchmark/bench_fp8_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/benchmark/bench_fp8_gemm.py -------------------------------------------------------------------------------- /sgl-kernel/benchmark/bench_int8_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/benchmark/bench_int8_gemm.py -------------------------------------------------------------------------------- /sgl-kernel/benchmark/bench_mrope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/benchmark/bench_mrope.py -------------------------------------------------------------------------------- /sgl-kernel/benchmark/bench_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/benchmark/bench_rmsnorm.py -------------------------------------------------------------------------------- /sgl-kernel/benchmark/bench_sum_scale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/benchmark/bench_sum_scale.py -------------------------------------------------------------------------------- /sgl-kernel/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/build.sh -------------------------------------------------------------------------------- /sgl-kernel/cmake/flashmla.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/cmake/flashmla.cmake -------------------------------------------------------------------------------- /sgl-kernel/cmake/utils.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/cmake/utils.cmake -------------------------------------------------------------------------------- /sgl-kernel/csrc/attention/cascade.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/attention/cascade.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/common_extension.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/common_extension.cc -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/CMakeLists.txt -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/activation.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/activation.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/bmm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/bmm.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/common.h -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/decode.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/decode.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/extend.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/extend.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/gemm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/gemm.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/gemm.h -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/gemm_fp8.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/gemm_fp8.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/gemm_int8.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/gemm_int8.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/interface.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/mamba/conv.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/mamba/conv.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/mamba/fla.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/mamba/fla.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/model/qwen3.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/model/qwen3.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/moe.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/moe.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/moe_fp8.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/moe_fp8.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/moe_int8.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/moe_int8.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/norm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/norm.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/numa_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/numa_utils.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/qkv_proj.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/qkv_proj.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/rope.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/rope.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/shm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/shm.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/shm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/shm.h -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/topk.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/topk.cpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/vec.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/vec.h -------------------------------------------------------------------------------- /sgl-kernel/csrc/cpu/vec_pack.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/cpu/vec_pack.h -------------------------------------------------------------------------------- /sgl-kernel/csrc/elementwise/cast.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/elementwise/cast.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/elementwise/copy.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/elementwise/copy.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/elementwise/pos_enc.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/elementwise/pos_enc.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/elementwise/pos_enc.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/elementwise/pos_enc.cuh -------------------------------------------------------------------------------- /sgl-kernel/csrc/elementwise/rope.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/elementwise/rope.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/elementwise/topk.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/elementwise/topk.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/elementwise/utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/elementwise/utils.cuh -------------------------------------------------------------------------------- /sgl-kernel/csrc/flash_extension.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/flash_extension.cc -------------------------------------------------------------------------------- /sgl-kernel/csrc/flashmla_extension.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/flashmla_extension.cc -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/awq_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/awq_kernel.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/bmm_fp8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/bmm_fp8.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/fp8_gemm_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/fp8_gemm_kernel.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/gptq/compat.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/gptq/compat.cuh -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/gptq/qdq_2.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/gptq/qdq_2.cuh -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/gptq/qdq_3.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/gptq/qdq_3.cuh -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/gptq/qdq_4.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/gptq/qdq_4.cuh -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/gptq/qdq_8.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/gptq/qdq_8.cuh -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/gptq/qdq_util.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/gptq/qdq_util.cuh -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/marlin/dequant.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/marlin/dequant.h -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/marlin/kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/marlin/kernel.h -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/marlin/marlin.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/marlin/marlin.cuh -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/math.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/math.hpp -------------------------------------------------------------------------------- /sgl-kernel/csrc/gemm/nvfp4_quant.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/gemm/nvfp4_quant.cuh -------------------------------------------------------------------------------- /sgl-kernel/csrc/kvcacheio/transfer.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/kvcacheio/transfer.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/mamba/causal_conv1d.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/mamba/causal_conv1d.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/mamba/causal_conv1d.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/mamba/causal_conv1d.h -------------------------------------------------------------------------------- /sgl-kernel/csrc/memory/store.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/memory/store.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/moe/moe_align_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/moe/moe_align_kernel.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/moe/moe_fused_gate.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/moe/moe_fused_gate.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/moe/moe_sum.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/moe/moe_sum.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/moe/moe_sum_reduce.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/moe/moe_sum_reduce.cu -------------------------------------------------------------------------------- /sgl-kernel/csrc/spatial/cuda_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/spatial/cuda_utils.h -------------------------------------------------------------------------------- /sgl-kernel/csrc/spatial_extension.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/spatial_extension.cc -------------------------------------------------------------------------------- /sgl-kernel/csrc/speculative/packbit.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/csrc/speculative/packbit.cu -------------------------------------------------------------------------------- /sgl-kernel/include/hip/hip_math_def.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/include/hip/hip_math_def.h -------------------------------------------------------------------------------- /sgl-kernel/include/hip/hip_vec_dtypes.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/include/hip/hip_vec_dtypes.h -------------------------------------------------------------------------------- /sgl-kernel/include/scalar_type.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/include/scalar_type.hpp -------------------------------------------------------------------------------- /sgl-kernel/include/sgl_kernel_ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/include/sgl_kernel_ops.h -------------------------------------------------------------------------------- /sgl-kernel/include/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/include/utils.h -------------------------------------------------------------------------------- /sgl-kernel/kernel-runner-setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/kernel-runner-setup.sh -------------------------------------------------------------------------------- /sgl-kernel/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/pyproject.toml -------------------------------------------------------------------------------- /sgl-kernel/pyproject_cpu.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/pyproject_cpu.toml -------------------------------------------------------------------------------- /sgl-kernel/pyproject_rocm.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/pyproject_rocm.toml -------------------------------------------------------------------------------- /sgl-kernel/python/sgl_kernel/gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/python/sgl_kernel/gemm.py -------------------------------------------------------------------------------- /sgl-kernel/python/sgl_kernel/grammar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/python/sgl_kernel/grammar.py -------------------------------------------------------------------------------- /sgl-kernel/python/sgl_kernel/mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/python/sgl_kernel/mamba.py -------------------------------------------------------------------------------- /sgl-kernel/python/sgl_kernel/moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/python/sgl_kernel/moe.py -------------------------------------------------------------------------------- /sgl-kernel/python/sgl_kernel/testing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sgl-kernel/python/sgl_kernel/top_k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/python/sgl_kernel/top_k.py -------------------------------------------------------------------------------- /sgl-kernel/python/sgl_kernel/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/python/sgl_kernel/utils.py -------------------------------------------------------------------------------- /sgl-kernel/python/sgl_kernel/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.3.18.post3" 2 | -------------------------------------------------------------------------------- /sgl-kernel/rename_wheels.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/rename_wheels.sh -------------------------------------------------------------------------------- /sgl-kernel/setup_rocm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/setup_rocm.py -------------------------------------------------------------------------------- /sgl-kernel/tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/conftest.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_activation.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_awq_dequant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_awq_dequant.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_bmm_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_bmm_fp8.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_copy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_copy.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_cutlass_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_cutlass_mla.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_flashmla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_flashmla.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_fp4_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_fp4_gemm.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_fp4_quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_fp4_quantize.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_fp8_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_fp8_gemm.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_gguf.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_gptq_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_gptq_kernel.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_hadamard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_hadamard.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_int8_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_int8_gemm.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_kvcacheio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_kvcacheio.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_marlin_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_marlin_gemm.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_merge_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_merge_state.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_moe_align.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_moe_align.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_mscclpp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_mscclpp.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_norm.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_sampling.py -------------------------------------------------------------------------------- /sgl-kernel/tests/test_topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/test_topk.py -------------------------------------------------------------------------------- /sgl-kernel/tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-kernel/tests/utils.py -------------------------------------------------------------------------------- /sgl-model-gateway/.cargo/config.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/.cargo/config.toml -------------------------------------------------------------------------------- /sgl-model-gateway/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/Cargo.toml -------------------------------------------------------------------------------- /sgl-model-gateway/LICENSE: -------------------------------------------------------------------------------- 1 | ../LICENSE -------------------------------------------------------------------------------- /sgl-model-gateway/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/Makefile -------------------------------------------------------------------------------- /sgl-model-gateway/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/README.md -------------------------------------------------------------------------------- /sgl-model-gateway/bindings/python/sglang_router/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.3" 2 | -------------------------------------------------------------------------------- /sgl-model-gateway/build.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/build.rs -------------------------------------------------------------------------------- /sgl-model-gateway/py_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/py_test/__init__.py -------------------------------------------------------------------------------- /sgl-model-gateway/py_test/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/py_test/conftest.py -------------------------------------------------------------------------------- /sgl-model-gateway/py_test/fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | """Shared fixtures for router integration tests.""" 2 | -------------------------------------------------------------------------------- /sgl-model-gateway/py_test/integration_mock/__init__.py: -------------------------------------------------------------------------------- 1 | """Integration test package for the router.""" 2 | -------------------------------------------------------------------------------- /sgl-model-gateway/py_test/integration_mock/load_balancing/__init__.py: -------------------------------------------------------------------------------- 1 | """Load balancing integration tests.""" 2 | -------------------------------------------------------------------------------- /sgl-model-gateway/pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/pytest.ini -------------------------------------------------------------------------------- /sgl-model-gateway/rustfmt.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/rustfmt.toml -------------------------------------------------------------------------------- /sgl-model-gateway/src/app_context.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/app_context.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/config/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/config/mod.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/config/types.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/config/types.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/core/error.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/core/error.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/core/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/core/mod.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/core/retry.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/core/retry.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/core/worker.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/core/worker.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/lib.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/main.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/main.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/mcp/config.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/mcp/config.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/mcp/error.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/mcp/error.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/mcp/manager.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/mcp/manager.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/mcp/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/mcp/mod.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/mcp/oauth.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/mcp/oauth.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/mcp/proxy.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/mcp/proxy.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/middleware.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/middleware.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/policies/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/policies/mod.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/routers/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/routers/mod.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/server.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/server.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/version.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/version.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/wasm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/wasm/README.md -------------------------------------------------------------------------------- /sgl-model-gateway/src/wasm/config.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/wasm/config.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/wasm/errors.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/wasm/errors.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/wasm/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/wasm/mod.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/wasm/module.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/wasm/module.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/wasm/route.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/wasm/route.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/wasm/runtime.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/wasm/runtime.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/wasm/spec.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/wasm/spec.rs -------------------------------------------------------------------------------- /sgl-model-gateway/src/wasm/types.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/src/wasm/types.rs -------------------------------------------------------------------------------- /sgl-model-gateway/tests/common/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/tests/common/mod.rs -------------------------------------------------------------------------------- /sgl-model-gateway/tests/mcp_test.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/tests/mcp_test.rs -------------------------------------------------------------------------------- /sgl-model-gateway/tests/spec/mod.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/tests/spec/mod.rs -------------------------------------------------------------------------------- /sgl-model-gateway/tests/spec_test.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/tests/spec_test.rs -------------------------------------------------------------------------------- /sgl-model-gateway/tests/wasm_test.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/sgl-model-gateway/tests/wasm_test.rs -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/README.md -------------------------------------------------------------------------------- /test/lora_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/lora_utils.py -------------------------------------------------------------------------------- /test/manual/cpu/test_comm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/cpu/test_comm.py -------------------------------------------------------------------------------- /test/manual/ep/test_eplb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/ep/test_eplb.py -------------------------------------------------------------------------------- /test/manual/ep/test_moe_deepep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/ep/test_moe_deepep.py -------------------------------------------------------------------------------- /test/manual/lora/test_lora_llama4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/lora/test_lora_llama4.py -------------------------------------------------------------------------------- /test/manual/models/test_mtp_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/models/test_mtp_models.py -------------------------------------------------------------------------------- /test/manual/nightly/test_vlms_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/nightly/test_vlms_perf.py -------------------------------------------------------------------------------- /test/manual/quant/test_fp8_kvcache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/quant/test_fp8_kvcache.py -------------------------------------------------------------------------------- /test/manual/test_custom_allreduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_custom_allreduce.py -------------------------------------------------------------------------------- /test/manual/test_double_sparsity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_double_sparsity.py -------------------------------------------------------------------------------- /test/manual/test_fim_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_fim_completion.py -------------------------------------------------------------------------------- /test/manual/test_health_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_health_check.py -------------------------------------------------------------------------------- /test/manual/test_kv_events.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_kv_events.py -------------------------------------------------------------------------------- /test/manual/test_logprobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_logprobs.py -------------------------------------------------------------------------------- /test/manual/test_lora_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_lora_ops.py -------------------------------------------------------------------------------- /test/manual/test_mla_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_mla_tp.py -------------------------------------------------------------------------------- /test/manual/test_modelopt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_modelopt.py -------------------------------------------------------------------------------- /test/manual/test_mscclpp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_mscclpp.py -------------------------------------------------------------------------------- /test/manual/test_quick_allreduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_quick_allreduce.py -------------------------------------------------------------------------------- /test/manual/test_sagemaker_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_sagemaker_server.py -------------------------------------------------------------------------------- /test/manual/test_schedule_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_schedule_policy.py -------------------------------------------------------------------------------- /test/manual/test_session_control.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_session_control.py -------------------------------------------------------------------------------- /test/manual/test_tokenizer_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_tokenizer_manager.py -------------------------------------------------------------------------------- /test/manual/test_torch_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_torch_backend.py -------------------------------------------------------------------------------- /test/manual/test_torch_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_torch_tp.py -------------------------------------------------------------------------------- /test/manual/test_tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_tracing.py -------------------------------------------------------------------------------- /test/manual/test_triton_moe_wna16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_triton_moe_wna16.py -------------------------------------------------------------------------------- /test/manual/test_two_batch_overlap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_two_batch_overlap.py -------------------------------------------------------------------------------- /test/manual/test_vertex_endpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_vertex_endpoint.py -------------------------------------------------------------------------------- /test/manual/test_vlm_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_vlm_accuracy.py -------------------------------------------------------------------------------- /test/manual/test_weight_version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/manual/test_weight_version.py -------------------------------------------------------------------------------- /test/nightly/nightly_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/nightly_utils.py -------------------------------------------------------------------------------- /test/nightly/test_cpp_radix_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_cpp_radix_cache.py -------------------------------------------------------------------------------- /test/nightly/test_deepseek_v32_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_deepseek_v32_tp.py -------------------------------------------------------------------------------- /test/nightly/test_encoder_dp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_encoder_dp.py -------------------------------------------------------------------------------- /test/nightly/test_fp4_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_fp4_moe.py -------------------------------------------------------------------------------- /test/nightly/test_glm_4_6_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_glm_4_6_perf.py -------------------------------------------------------------------------------- /test/nightly/test_lora_openai_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_lora_openai_api.py -------------------------------------------------------------------------------- /test/nightly/test_lora_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_lora_qwen3.py -------------------------------------------------------------------------------- /test/nightly/test_lora_radix_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_lora_radix_cache.py -------------------------------------------------------------------------------- /test/nightly/test_minimax_m2_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_minimax_m2_perf.py -------------------------------------------------------------------------------- /test/nightly/test_nsa_indexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_nsa_indexer.py -------------------------------------------------------------------------------- /test/nightly/test_qwen3_235b_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_qwen3_235b_perf.py -------------------------------------------------------------------------------- /test/nightly/test_text_models_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_text_models_perf.py -------------------------------------------------------------------------------- /test/nightly/test_vlms_mmmu_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_vlms_mmmu_eval.py -------------------------------------------------------------------------------- /test/nightly/test_vlms_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/nightly/test_vlms_perf.py -------------------------------------------------------------------------------- /test/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | asyncio_mode = auto 3 | -------------------------------------------------------------------------------- /test/registered/test_srt_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/registered/test_srt_backend.py -------------------------------------------------------------------------------- /test/run_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/run_suite.py -------------------------------------------------------------------------------- /test/run_suite_nightly.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/run_suite_nightly.py -------------------------------------------------------------------------------- /test/srt/ascend/test_ascend_deepep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/ascend/test_ascend_deepep.py -------------------------------------------------------------------------------- /test/srt/configs/deepseek_v3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/configs/deepseek_v3.yaml -------------------------------------------------------------------------------- /test/srt/configs/llama_405b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/configs/llama_405b.yaml -------------------------------------------------------------------------------- /test/srt/configs/random_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/configs/random_config.yaml -------------------------------------------------------------------------------- /test/srt/configs/sharegpt_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/configs/sharegpt_config.yaml -------------------------------------------------------------------------------- /test/srt/cpu/test_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_activation.py -------------------------------------------------------------------------------- /test/srt/cpu/test_binding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_binding.py -------------------------------------------------------------------------------- /test/srt/cpu/test_causal_conv1d.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_causal_conv1d.py -------------------------------------------------------------------------------- /test/srt/cpu/test_cpu_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_cpu_graph.py -------------------------------------------------------------------------------- /test/srt/cpu/test_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_decode.py -------------------------------------------------------------------------------- /test/srt/cpu/test_extend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_extend.py -------------------------------------------------------------------------------- /test/srt/cpu/test_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_gemm.py -------------------------------------------------------------------------------- /test/srt/cpu/test_mamba.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_mamba.py -------------------------------------------------------------------------------- /test/srt/cpu/test_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_mla.py -------------------------------------------------------------------------------- /test/srt/cpu/test_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_moe.py -------------------------------------------------------------------------------- /test/srt/cpu/test_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_norm.py -------------------------------------------------------------------------------- /test/srt/cpu/test_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_qwen3.py -------------------------------------------------------------------------------- /test/srt/cpu/test_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_rope.py -------------------------------------------------------------------------------- /test/srt/cpu/test_shared_expert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_shared_expert.py -------------------------------------------------------------------------------- /test/srt/cpu/test_topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/test_topk.py -------------------------------------------------------------------------------- /test/srt/cpu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/cpu/utils.py -------------------------------------------------------------------------------- /test/srt/ep/test_deepep_large.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/ep/test_deepep_large.py -------------------------------------------------------------------------------- /test/srt/ep/test_deepep_small.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/ep/test_deepep_small.py -------------------------------------------------------------------------------- /test/srt/ep/test_moe_ep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/ep/test_moe_ep.py -------------------------------------------------------------------------------- /test/srt/experiment_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/experiment_runner.py -------------------------------------------------------------------------------- /test/srt/lora/test_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/lora/test_lora.py -------------------------------------------------------------------------------- /test/srt/lora/test_lora_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/lora/test_lora_backend.py -------------------------------------------------------------------------------- /test/srt/lora/test_lora_eviction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/lora/test_lora_eviction.py -------------------------------------------------------------------------------- /test/srt/lora/test_lora_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/lora/test_lora_tp.py -------------------------------------------------------------------------------- /test/srt/lora/test_lora_update.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/lora/test_lora_update.py -------------------------------------------------------------------------------- /test/srt/models/compare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/models/compare.py -------------------------------------------------------------------------------- /test/srt/models/test_qwen_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/models/test_qwen_models.py -------------------------------------------------------------------------------- /test/srt/models/test_reward_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/models/test_reward_models.py -------------------------------------------------------------------------------- /test/srt/models/test_vlm_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/models/test_vlm_models.py -------------------------------------------------------------------------------- /test/srt/openai_server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/srt/openai_server/basic/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/srt/openai_server/features/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/srt/openai_server/function_call/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/srt/openai_server/validation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/srt/parse_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/parse_results.py -------------------------------------------------------------------------------- /test/srt/quant/test_autoround.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/quant/test_autoround.py -------------------------------------------------------------------------------- /test/srt/quant/test_awq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/quant/test_awq.py -------------------------------------------------------------------------------- /test/srt/quant/test_awq_dequant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/quant/test_awq_dequant.py -------------------------------------------------------------------------------- /test/srt/quant/test_block_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/quant/test_block_int8.py -------------------------------------------------------------------------------- /test/srt/quant/test_fp8_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/quant/test_fp8_kernel.py -------------------------------------------------------------------------------- /test/srt/quant/test_int8_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/quant/test_int8_kernel.py -------------------------------------------------------------------------------- /test/srt/rl/test_fp32_lm_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/rl/test_fp32_lm_head.py -------------------------------------------------------------------------------- /test/srt/run_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/run_suite.py -------------------------------------------------------------------------------- /test/srt/test_abort.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_abort.py -------------------------------------------------------------------------------- /test/srt/test_bench_one_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_bench_one_batch.py -------------------------------------------------------------------------------- /test/srt/test_bench_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_bench_serving.py -------------------------------------------------------------------------------- /test/srt/test_bnb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_bnb.py -------------------------------------------------------------------------------- /test/srt/test_build_eagle_tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_build_eagle_tree.py -------------------------------------------------------------------------------- /test/srt/test_chunked_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_chunked_prefill.py -------------------------------------------------------------------------------- /test/srt/test_constrained_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_constrained_decoding.py -------------------------------------------------------------------------------- /test/srt/test_create_kvindices.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_create_kvindices.py -------------------------------------------------------------------------------- /test/srt/test_cutedsl_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_cutedsl_moe.py -------------------------------------------------------------------------------- /test/srt/test_data_parallelism.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_data_parallelism.py -------------------------------------------------------------------------------- /test/srt/test_deepseek_v32_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_deepseek_v32_basic.py -------------------------------------------------------------------------------- /test/srt/test_deepseek_v32_mtp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_deepseek_v32_mtp.py -------------------------------------------------------------------------------- /test/srt/test_deepseek_v3_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_deepseek_v3_basic.py -------------------------------------------------------------------------------- /test/srt/test_deepseek_v3_fp4_4gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_deepseek_v3_fp4_4gpu.py -------------------------------------------------------------------------------- /test/srt/test_deepseek_v3_mtp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_deepseek_v3_mtp.py -------------------------------------------------------------------------------- /test/srt/test_deterministic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_deterministic.py -------------------------------------------------------------------------------- /test/srt/test_disaggregation_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_disaggregation_basic.py -------------------------------------------------------------------------------- /test/srt/test_disaggregation_pp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_disaggregation_pp.py -------------------------------------------------------------------------------- /test/srt/test_dp_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_dp_attention.py -------------------------------------------------------------------------------- /test/srt/test_eagle_dp_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_eagle_dp_attention.py -------------------------------------------------------------------------------- /test/srt/test_eagle_infer_a.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_eagle_infer_a.py -------------------------------------------------------------------------------- /test/srt/test_eagle_infer_b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_eagle_infer_b.py -------------------------------------------------------------------------------- /test/srt/test_eagle_infer_beta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_eagle_infer_beta.py -------------------------------------------------------------------------------- /test/srt/test_eval_accuracy_large.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_eval_accuracy_large.py -------------------------------------------------------------------------------- /test/srt/test_eval_fp8_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_eval_fp8_accuracy.py -------------------------------------------------------------------------------- /test/srt/test_external_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_external_models.py -------------------------------------------------------------------------------- /test/srt/test_fa3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_fa3.py -------------------------------------------------------------------------------- /test/srt/test_flash_attention_4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_flash_attention_4.py -------------------------------------------------------------------------------- /test/srt/test_flashmla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_flashmla.py -------------------------------------------------------------------------------- /test/srt/test_fp8_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_fp8_utils.py -------------------------------------------------------------------------------- /test/srt/test_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_fused_moe.py -------------------------------------------------------------------------------- /test/srt/test_gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_gguf.py -------------------------------------------------------------------------------- /test/srt/test_gpt_oss_1gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_gpt_oss_1gpu.py -------------------------------------------------------------------------------- /test/srt/test_gpt_oss_4gpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_gpt_oss_4gpu.py -------------------------------------------------------------------------------- /test/srt/test_gpt_oss_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_gpt_oss_common.py -------------------------------------------------------------------------------- /test/srt/test_gptqmodel_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_gptqmodel_dynamic.py -------------------------------------------------------------------------------- /test/srt/test_harmony_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_harmony_parser.py -------------------------------------------------------------------------------- /test/srt/test_hidden_states.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_hidden_states.py -------------------------------------------------------------------------------- /test/srt/test_hybrid_attn_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_hybrid_attn_backend.py -------------------------------------------------------------------------------- /test/srt/test_input_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_input_embeddings.py -------------------------------------------------------------------------------- /test/srt/test_io_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_io_struct.py -------------------------------------------------------------------------------- /test/srt/test_jinja_template_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_jinja_template_utils.py -------------------------------------------------------------------------------- /test/srt/test_llama31_fp4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_llama31_fp4.py -------------------------------------------------------------------------------- /test/srt/test_local_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_local_attn.py -------------------------------------------------------------------------------- /test/srt/test_mamba_unittest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_mamba_unittest.py -------------------------------------------------------------------------------- /test/srt/test_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_metrics.py -------------------------------------------------------------------------------- /test/srt/test_metrics_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_metrics_utils.py -------------------------------------------------------------------------------- /test/srt/test_mistral_large3_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_mistral_large3_basic.py -------------------------------------------------------------------------------- /test/srt/test_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_mla.py -------------------------------------------------------------------------------- /test/srt/test_mla_deepseek_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_mla_deepseek_v3.py -------------------------------------------------------------------------------- /test/srt/test_mla_flashinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_mla_flashinfer.py -------------------------------------------------------------------------------- /test/srt/test_mla_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_mla_fp8.py -------------------------------------------------------------------------------- /test/srt/test_mla_int8_deepseek_v3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_mla_int8_deepseek_v3.py -------------------------------------------------------------------------------- /test/srt/test_model_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_model_hooks.py -------------------------------------------------------------------------------- /test/srt/test_modelopt_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_modelopt_export.py -------------------------------------------------------------------------------- /test/srt/test_modelopt_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_modelopt_loader.py -------------------------------------------------------------------------------- /test/srt/test_multi_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_multi_tokenizer.py -------------------------------------------------------------------------------- /test/srt/test_no_chunked_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_no_chunked_prefill.py -------------------------------------------------------------------------------- /test/srt/test_no_overlap_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_no_overlap_scheduler.py -------------------------------------------------------------------------------- /test/srt/test_original_logprobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_original_logprobs.py -------------------------------------------------------------------------------- /test/srt/test_page_size.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_page_size.py -------------------------------------------------------------------------------- /test/srt/test_patch_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_patch_torch.py -------------------------------------------------------------------------------- /test/srt/test_penalty.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_penalty.py -------------------------------------------------------------------------------- /test/srt/test_piecewise_cuda_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_piecewise_cuda_graph.py -------------------------------------------------------------------------------- /test/srt/test_pp_single_node.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_pp_single_node.py -------------------------------------------------------------------------------- /test/srt/test_priority_scheduling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_priority_scheduling.py -------------------------------------------------------------------------------- /test/srt/test_profile_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_profile_merger.py -------------------------------------------------------------------------------- /test/srt/test_profile_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_profile_v2.py -------------------------------------------------------------------------------- /test/srt/test_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_quantization.py -------------------------------------------------------------------------------- /test/srt/test_radix_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_radix_attention.py -------------------------------------------------------------------------------- /test/srt/test_radix_cache_unit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_radix_cache_unit.py -------------------------------------------------------------------------------- /test/srt/test_reasoning_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_reasoning_parser.py -------------------------------------------------------------------------------- /test/srt/test_retract_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_retract_decode.py -------------------------------------------------------------------------------- /test/srt/test_rope_rocm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_rope_rocm.py -------------------------------------------------------------------------------- /test/srt/test_score_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_score_api.py -------------------------------------------------------------------------------- /test/srt/test_server_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_server_args.py -------------------------------------------------------------------------------- /test/srt/test_skip_tokenizer_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_skip_tokenizer_init.py -------------------------------------------------------------------------------- /test/srt/test_speculative_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_speculative_registry.py -------------------------------------------------------------------------------- /test/srt/test_srt_endpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_srt_endpoint.py -------------------------------------------------------------------------------- /test/srt/test_srt_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_srt_engine.py -------------------------------------------------------------------------------- /test/srt/test_start_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_start_profile.py -------------------------------------------------------------------------------- /test/srt/test_swa_unittest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_swa_unittest.py -------------------------------------------------------------------------------- /test/srt/test_torch_compile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_torch_compile.py -------------------------------------------------------------------------------- /test/srt/test_torch_compile_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_torch_compile_moe.py -------------------------------------------------------------------------------- /test/srt/test_torchao.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_torchao.py -------------------------------------------------------------------------------- /test/srt/test_triton_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_triton_fused_moe.py -------------------------------------------------------------------------------- /test/srt/test_utils_update_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_utils_update_weights.py -------------------------------------------------------------------------------- /test/srt/test_video_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_video_utils.py -------------------------------------------------------------------------------- /test/srt/test_vlm_input_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgl-project/sglang/HEAD/test/srt/test_vlm_input_format.py --------------------------------------------------------------------------------