├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    └── workflows
    │   ├── docker-publish.yml
    │   └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── assets
    ├── att.gif
    ├── lightllm.drawio.png
    ├── logo.png
    └── logo_new.png
├── benchmark.md
├── demos
    ├── qa_server
    │   ├── __init__.py
    │   ├── chat_server.py
    │   ├── qabot.py
    │   └── templates
    │   │   └── chat.html
    └── readme.txt
├── docker
    ├── Dockerfile
    ├── Dockerfile.deepep
    ├── Dockerfile.nixl
    ├── Dockerfile.nixl.deepep
    ├── cuda_version_12.6.1
    │   ├── Dockerfile
    │   ├── Dockerfile.deepep
    │   ├── Dockerfile.nixl
    │   └── Dockerfile.nixl.deepep
    └── cuda_version_12.8.0
    │   ├── Dockerfile
    │   ├── Dockerfile.deepep
    │   ├── Dockerfile.nixl
    │   ├── Dockerfile.nixl.deepep
    │   └── Dockerfile.nixl.deepep.cache
├── docs
    ├── CN
    │   ├── .readthedocs.yaml
    │   ├── Makefile
    │   ├── README.md
    │   ├── make.bat
    │   ├── rebuild.sh
    │   ├── requirements-docs.txt
    │   └── source
    │   │   ├── assets
    │   │       ├── lightllm
    │   │       │   ├── ER1.png
    │   │       │   ├── ER2.png
    │   │       │   ├── ER3.png
    │   │       │   ├── ER4.png
    │   │       │   ├── HttpServer.png
    │   │       │   ├── Performance.png
    │   │       │   ├── Performance2.png
    │   │       │   ├── Router.png
    │   │       │   ├── Visual_Server.png
    │   │       │   ├── arch.png
    │   │       │   ├── backend.png
    │   │       │   └── token_attn.gif
    │   │       └── logos
    │   │       │   └── lightllm-logo.png
    │   │   ├── conf.py
    │   │   ├── framework
    │   │       ├── framework.rst
    │   │       ├── router.rst
    │   │       └── token_attention.rst
    │   │   ├── getting_started
    │   │       ├── benchmark.rst
    │   │       ├── installation.rst
    │   │       └── quickstart.rst
    │   │   ├── index.rst
    │   │   ├── models
    │   │       ├── add_new_model.md
    │   │       └── supported_models.rst
    │   │   └── tutorial
    │   │       ├── api_param.rst
    │   │       ├── api_server_args_zh.rst
    │   │       ├── deepseek_deployment.rst
    │   │       ├── multi_level_cache_deployment.rst
    │   │       ├── multimodal.rst
    │   │       ├── openai.rst
    │   │       └── reward_model.rst
    └── EN
    │   ├── .readthedocs.yaml
    │   ├── Makefile
    │   ├── README.md
    │   ├── make.bat
    │   ├── rebuild.sh
    │   ├── requirements-docs.txt
    │   └── source
    │       ├── assets
    │           ├── lightllm
    │           │   ├── ER1.png
    │           │   ├── ER2.png
    │           │   ├── ER3.png
    │           │   ├── ER4.png
    │           │   ├── HttpServer.png
    │           │   ├── Performance.png
    │           │   ├── Performance2.png
    │           │   ├── Router.png
    │           │   ├── Visual_Server.png
    │           │   ├── arch.png
    │           │   ├── backend.png
    │           │   └── token_attn.gif
    │           └── logos
    │           │   └── lightllm-logo.png
    │       ├── conf.py
    │       ├── framework
    │           ├── framework.rst
    │           ├── router.rst
    │           └── token_attention.rst
    │       ├── getting_started
    │           ├── benchmark.rst
    │           ├── installation.rst
    │           └── quickstart.rst
    │       ├── index.rst
    │       ├── models
    │           ├── add_new_model.md
    │           └── supported_models.rst
    │       └── tutorial
    │           ├── api_param.rst
    │           ├── api_server_args_zh.rst
    │           ├── deepseek_deployment.rst
    │           ├── multi_level_cache_deployment.rst
    │           ├── multimodal.rst
    │           ├── openai.rst
    │           └── reward_model.rst
├── format.py
├── format_out
    ├── __init__.py
    ├── grammer
    │   ├── __init__.py
    │   ├── core.py
    │   ├── dpda.py
    │   ├── json.ebnf
    │   ├── test.sh
    │   ├── test0.py
    │   ├── test1.py
    │   ├── test2.py
    │   ├── test3.py
    │   ├── test4.py
    │   ├── test5.py
    │   └── test6.py
    └── impl.py
├── lightllm
    ├── __init__.py
    ├── common
    │   ├── __init__.py
    │   ├── all_kernel_configs
    │   │   ├── __init__.py
    │   │   ├── _fwd_kernel_flash_decode_diverse_stage1:v1
    │   │   │   ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json
    │   │   │   ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json
    │   │   │   ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_H200.json
    │   │   │   ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json
    │   │   │   ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json
    │   │   │   ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_H200.json
    │   │   │   ├── {block_seq=256,gqa_group_size=2,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json
    │   │   │   ├── {block_seq=256,gqa_group_size=2,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json
    │   │   │   ├── {block_seq=256,gqa_group_size=2,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json
    │   │   │   ├── {block_seq=256,gqa_group_size=2,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json
    │   │   │   ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json
    │   │   │   ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json
    │   │   │   ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_H200.json
    │   │   │   ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json
    │   │   │   ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json
    │   │   │   ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_H200.json
    │   │   │   ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json
    │   │   │   ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json
    │   │   │   ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_H200.json
    │   │   │   ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json
    │   │   │   ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json
    │   │   │   ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_H200.json
    │   │   │   ├── {block_seq=256,gqa_group_size=8,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json
    │   │   │   ├── {block_seq=256,gqa_group_size=8,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json
    │   │   │   ├── {block_seq=256,gqa_group_size=8,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_H200.json
    │   │   │   ├── {block_seq=256,gqa_group_size=8,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json
    │   │   │   ├── {block_seq=256,gqa_group_size=8,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json
    │   │   │   └── {block_seq=256,gqa_group_size=8,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_H200.json
    │   │   ├── bmm_scaled_fp8
    │   │   │   ├── {B=16,K=128,M=1,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=128,M=1,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=128,M=128,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=128,M=128,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=128,M=16,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=128,M=16,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=128,M=2,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=128,M=2,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=128,M=256,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=128,M=256,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=128,M=32,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=128,M=32,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=128,M=4,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=128,M=4,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=128,M=512,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=128,M=512,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=128,M=64,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=128,M=64,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=128,M=8,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=128,M=8,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=512,M=1,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=512,M=1,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=512,M=128,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=512,M=128,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=512,M=16,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=512,M=16,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=512,M=2,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=512,M=2,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=512,M=256,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=512,M=256,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=512,M=32,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=512,M=32,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=512,M=4,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=512,M=4,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=512,M=512,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=512,M=512,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=512,M=64,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {B=16,K=512,M=64,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {B=16,K=512,M=8,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   └── {B=16,K=512,M=8,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   ├── deepseek_v3_rotary_emb_kernel
    │   │   │   ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=128,dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=16,dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=32,dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=64,dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   └── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=8,dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   ├── fp8_block_mm
    │   │   │   ├── {K=1024,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=1024,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=1152,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=1152,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=1536,N=1536,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=1536,N=1536,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=16384,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=16384,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=18432,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=18432,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=2048,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=2048,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=2304,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=2304,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=256,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=256,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=512,N=2048,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=512,N=2048,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=512,N=32768,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=512,N=32768,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=512,N=4096,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=512,N=4096,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=7168,N=1536,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=1536,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=7168,N=2304,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=2304,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=7168,N=24576,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=24576,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=7168,N=256,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=256,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=7168,N=36864,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=36864,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=7168,N=512,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=512,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=7168,N=576,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=576,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   │   ├── {K=7168,N=8072,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   └── {K=7168,N=8072,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json
    │   │   ├── grouped_moe_gemm_kernel
    │   │   │   ├── {K=128,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {K=128,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │   │   ├── {K=1408,N=2048,expert_num=64,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {K=192,N=5120,expert_num=160,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H800.json
    │   │   │   ├── {K=2048,N=1408,expert_num=64,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {K=2048,N=1408,expert_num=64,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_H800.json
    │   │   │   ├── {K=2048,N=192,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H200.json
    │   │   │   ├── {K=2048,N=2816,expert_num=64,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {K=256,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {K=256,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │   │   ├── {K=256,N=7168,expert_num=257,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │   │   ├── {K=4096,N=192,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H200.json
    │   │   │   ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {K=512,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │   │   ├── {K=5120,N=384,expert_num=160,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=true}_NVIDIA_H800.json
    │   │   │   ├── {K=704,N=2048,expert_num=64,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {K=704,N=2048,expert_num=64,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H800.json
    │   │   │   ├── {K=7168,N=1024,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=1024,expert_num=257,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=256,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {K=7168,N=256,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=512,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json
    │   │   │   ├── {K=7168,N=512,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │   │   ├── {K=7168,N=512,expert_num=257,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │   │   ├── {K=96,N=2048,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H200.json
    │   │   │   └── {K=96,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H100_80GB_HBM3.json
    │   │   ├── mla_decode_attentnion
    │   │   │   ├── {out_dtype=torch.bfloat16,q_head_dim=512,q_head_num=128,q_rope_dim=64}_NVIDIA_H800.json
    │   │   │   ├── {out_dtype=torch.bfloat16,q_head_dim=512,q_head_num=16,q_rope_dim=64}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {out_dtype=torch.bfloat16,q_head_dim=512,q_head_num=16,q_rope_dim=64}_NVIDIA_H200.json
    │   │   │   └── {out_dtype=torch.bfloat16,q_head_dim=512,q_head_num=16,q_rope_dim=64}_NVIDIA_H800.json
    │   │   ├── moe_silu_and_mul_kernel
    │   │   │   ├── {N=1024,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {N=128,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {N=1408,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {N=192,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {N=2048,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {N=2304,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {N=256,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {N=4096,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   ├── {N=512,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   │   └── {N=8192,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │   └── moe_sum_reduce_kernel
    │   │   │   ├── {hidden_dim=1024,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {hidden_dim=1024,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_H800.json
    │   │   │   ├── {hidden_dim=1024,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {hidden_dim=1024,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_H800.json
    │   │   │   ├── {hidden_dim=2048,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {hidden_dim=2048,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_H800.json
    │   │   │   ├── {hidden_dim=2048,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {hidden_dim=2048,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_H800.json
    │   │   │   ├── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_H800.json
    │   │   │   ├── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_H800.json
    │   │   │   ├── {hidden_dim=5120,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {hidden_dim=5120,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_H800.json
    │   │   │   ├── {hidden_dim=5120,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {hidden_dim=5120,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_H800.json
    │   │   │   ├── {hidden_dim=8192,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   ├── {hidden_dim=8192,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_H800.json
    │   │   │   ├── {hidden_dim=8192,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_A800-SXM4-80GB.json
    │   │   │   └── {hidden_dim=8192,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_H800.json
    │   ├── basemodel
    │   │   ├── __init__.py
    │   │   ├── basemodel.py
    │   │   ├── batch_objs.py
    │   │   ├── cuda_graph.py
    │   │   ├── infer_lock.py
    │   │   ├── infer_struct.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   ├── base_layer_infer.py
    │   │   │   ├── cache_tensor_manager.py
    │   │   │   ├── post_layer_infer.py
    │   │   │   ├── pre_layer_infer.py
    │   │   │   ├── template
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── post_layer_infer_template.py
    │   │   │   │   ├── pre_layer_infer_template.py
    │   │   │   │   ├── transformer_layer_infer_cohere_template.py
    │   │   │   │   └── transformer_layer_infer_template.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── base_layer_weight.py
    │   │   │   ├── hf_load_utils.py
    │   │   │   ├── meta_weights
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base_weight.py
    │   │   │   │   ├── fused_moe_weight_ep.py
    │   │   │   │   ├── fused_moe_weight_ep_redundancy.py
    │   │   │   │   ├── fused_moe_weight_tp.py
    │   │   │   │   ├── gpt_oss_fused_moe_weight_tp.py
    │   │   │   │   ├── mm_weight
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── colmm_weight.py
    │   │   │   │   │   ├── mm_factory.py
    │   │   │   │   │   ├── mm_slicer.py
    │   │   │   │   │   ├── mm_weight.py
    │   │   │   │   │   └── rowmm_weight.py
    │   │   │   │   └── norm_weight.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   ├── multimodal_tokenizer.py
    │   │   └── triton_kernel
    │   │   │   ├── __init__.py
    │   │   │   ├── add_in_place.py
    │   │   │   ├── apply_penalty.py
    │   │   │   ├── apply_penalty_gpu_cache.py
    │   │   │   ├── bmm_scaled_fp8.py
    │   │   │   ├── copy_kv_index_to_req.py
    │   │   │   ├── dequantize_gemm_int4.py
    │   │   │   ├── dequantize_gemm_int8.py
    │   │   │   ├── destindex_copy_kv.py
    │   │   │   ├── destindex_copy_kv_fp8.py
    │   │   │   ├── fa3_utils.py
    │   │   │   ├── gather_token_id.py
    │   │   │   ├── gen_decode_params.py
    │   │   │   ├── gen_mtp_prefill_params.py
    │   │   │   ├── gen_prefill_params.py
    │   │   │   ├── gen_sampling_params.py
    │   │   │   ├── kv_cache_offload.py
    │   │   │   ├── mtp_utils.py
    │   │   │   ├── multimodal_emb.py
    │   │   │   ├── q_per_head_fp8_quant.py
    │   │   │   ├── quantize_gemm_int8.py
    │   │   │   ├── redundancy_topk_ids_repair.py
    │   │   │   └── sp_pad_copy.py
    │   ├── build_utils.py
    │   ├── cuda_wrapper.py
    │   ├── fused_moe
    │   │   ├── __init__.py
    │   │   ├── deepep_scatter_gather.py
    │   │   ├── grouped_fused_moe.py
    │   │   ├── grouped_fused_moe_ep.py
    │   │   ├── grouped_topk.py
    │   │   ├── moe_kernel_configs.py
    │   │   ├── moe_silu_and_mul.py
    │   │   ├── moe_silu_and_mul_config.py
    │   │   ├── moe_silu_and_mul_mix_quant_ep.py
    │   │   ├── moe_sum_recude_config.py
    │   │   ├── moe_sum_reduce.py
    │   │   ├── softmax_topk.py
    │   │   └── topk_select.py
    │   ├── infer_utils.py
    │   ├── kernel_config.py
    │   ├── kv_cache_mem_manager
    │   │   ├── __init__.py
    │   │   ├── calibration_fp8kv_mem_manager.py
    │   │   ├── deepseek2_fp8kv_mem_manager.py
    │   │   ├── deepseek2_mem_manager.py
    │   │   ├── export_calibration_mem_manager.py
    │   │   ├── int8kv_mem_manager.py
    │   │   ├── mem_manager.py
    │   │   ├── mem_utils.py
    │   │   ├── offline_fp8_quant_mem_manager.py
    │   │   ├── ppl_int4kv_mem_manager.py
    │   │   └── ppl_int8kv_mem_manager.py
    │   ├── kv_trans_kernel
    │   │   ├── __init__.py
    │   │   ├── kv_trans.py
    │   │   ├── kv_trans_v2.py
    │   │   └── nixl_kv_trans.py
    │   ├── quantization
    │   │   ├── __init__.py
    │   │   ├── awq_quant.py
    │   │   ├── deepgemm_quant.py
    │   │   ├── quantize_method.py
    │   │   ├── registry.py
    │   │   ├── torchao_quant.py
    │   │   ├── triton_quant
    │   │   │   ├── __init__.py
    │   │   │   ├── fp8
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── fp8act_quant_kernel.py
    │   │   │   │   ├── fp8w8a8_block_gemm_kernel.py
    │   │   │   │   ├── fp8w8a8_block_quant_kernel.py
    │   │   │   │   └── fp8w8a8_scaled_mm_per_token_kernel.py
    │   │   │   └── triton_quant.py
    │   │   └── w8a8_quant.py
    │   ├── req_manager.py
    │   └── triton_utils
    │   │   ├── __init__.py
    │   │   ├── autotune_kernel_configs
    │   │       ├── triton_3.3.1
    │   │       │   └── NVIDIA_H200
    │   │       │   │   ├── grouped_matmul:v1
    │   │       │   │       ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H200.json
    │   │       │   │       ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │       │   │       ├── {K=256,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │       │   │       ├── {K=256,N=7168,expert_num=257,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │       │   │       ├── {K=384,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H200.json
    │   │       │   │       ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H200.json
    │   │       │   │       ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │       │   │       ├── {K=4096,N=768,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H200.json
    │   │       │   │       ├── {K=7168,N=512,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │       │   │       └── {K=7168,N=512,expert_num=257,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=9,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │       │   │   ├── moe_sum_reduce:v1
    │   │       │   │       ├── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=8}_NVIDIA_H200.json
    │   │       │   │       ├── {hidden_dim=7168,out_dtype=torch.bfloat16,topk_num=8}_NVIDIA_H200.json
    │   │       │   │       └── {hidden_dim=7168,out_dtype=torch.bfloat16,topk_num=9}_NVIDIA_H200.json
    │   │       │   │   ├── rotary_emb_fwd:v1
    │   │       │   │       ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=128,dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       └── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=16,dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │   └── silu_and_mul_fwd:v1
    │   │       │   │       ├── {N=1536,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       ├── {N=18432,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       ├── {N=192,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       ├── {N=2048,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       ├── {N=2304,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       ├── {N=256,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       └── {N=384,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       ├── triton_3.4.0
    │   │       │   ├── NVIDIA_H20
    │   │       │   │   ├── grouped_matmul:v1
    │   │       │   │   │   ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H20.json
    │   │       │   │   │   ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H20.json
    │   │       │   │   │   ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H20.json
    │   │       │   │   │   └── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H20.json
    │   │       │   │   ├── moe_align_fused:v1
    │   │       │   │   │   └── {topk_num=8}_NVIDIA_H20.json
    │   │       │   │   ├── moe_sum_reduce:v1
    │   │       │   │   │   └── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=8}_NVIDIA_H20.json
    │   │       │   │   └── silu_and_mul_fwd:v1
    │   │       │   │   │   ├── {N=1536,out_dtype=torch.bfloat16}_NVIDIA_H20.json
    │   │       │   │   │   └── {N=192,out_dtype=torch.bfloat16}_NVIDIA_H20.json
    │   │       │   └── NVIDIA_H200
    │   │       │   │   ├── grouped_matmul:v1
    │   │       │   │       ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H200.json
    │   │       │   │       ├── {K=256,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │       │   │       ├── {K=256,N=7168,expert_num=257,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │       │   │       ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H200.json
    │   │       │   │       ├── {K=7168,N=512,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │       │   │       └── {K=7168,N=512,expert_num=257,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=9,use_fp8_w8a8=true}_NVIDIA_H200.json
    │   │       │   │   ├── moe_align_fused:v1
    │   │       │   │       ├── {topk_num=8}_NVIDIA_H200.json
    │   │       │   │       └── {topk_num=9}_NVIDIA_H200.json
    │   │       │   │   ├── moe_sum_reduce:v1
    │   │       │   │       ├── {hidden_dim=7168,out_dtype=torch.bfloat16,topk_num=8}_NVIDIA_H200.json
    │   │       │   │       └── {hidden_dim=7168,out_dtype=torch.bfloat16,topk_num=9}_NVIDIA_H200.json
    │   │       │   │   ├── rotary_emb_fwd:v1
    │   │       │   │       ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=128,dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       └── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=16,dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │   └── silu_and_mul_fwd:v1
    │   │       │   │       ├── {N=18432,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       ├── {N=2048,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       ├── {N=2304,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       │   │       └── {N=256,out_dtype=torch.bfloat16}_NVIDIA_H200.json
    │   │       └── triton_3.5.1
    │   │       │   ├── NVIDIA_GeForce_RTX_4090_D
    │   │       │       └── fp8_scaled_mm_per_token:v3
    │   │       │       │   ├── {K=14336,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_4090_D.json
    │   │       │       │   ├── {K=4096,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_4090_D.json
    │   │       │       │   ├── {K=5120,N=2048,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_4090_D.json
    │   │       │       │   ├── {K=5120,N=28672,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_4090_D.json
    │   │       │       │   └── {K=5120,N=4096,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_4090_D.json
    │   │       │   └── NVIDIA_GeForce_RTX_5090
    │   │       │       ├── fp8_scaled_mm_per_token:v3
    │   │       │           ├── {K=13824,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           ├── {K=14336,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           ├── {K=1536,N=1536,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           ├── {K=1536,N=8960,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           ├── {K=4096,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           ├── {K=5120,N=13824,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           ├── {K=5120,N=2048,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           ├── {K=5120,N=28672,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           ├── {K=5120,N=4096,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           ├── {K=5120,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           ├── {K=8960,N=1536,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │           └── {N=14336,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │       │       └── silu_and_mul_fwd:v1
    │   │       │           └── {N=14336,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json
    │   │   └── autotuner.py
    ├── distributed
    │   ├── __init__.py
    │   ├── communication_op.py
    │   ├── custom_all_gather.py
    │   ├── custom_all_reduce.py
    │   ├── pynccl.py
    │   └── pynccl_wrapper.py
    ├── models
    │   ├── __init__.py
    │   ├── bloom
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   ├── post_layer_infer.py
    │   │   │   ├── pre_layer_infer.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── hf_load_utils.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   ├── model.py
    │   │   └── triton_kernel
    │   │   │   ├── __init__.py
    │   │   │   ├── context_flashattention_nopad.py
    │   │   │   ├── layernorm.py
    │   │   │   ├── token_attention_nopad_att1.py
    │   │   │   ├── token_attention_nopad_reduceV.py
    │   │   │   ├── token_attention_nopad_softmax.py
    │   │   │   └── token_flashattention_nopad.py
    │   ├── chatglm2
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   ├── model.py
    │   │   └── triton_kernel
    │   │   │   ├── __init__.py
    │   │   │   └── rotary_emb.py
    │   ├── cohere
    │   │   ├── __init__.py
    │   │   ├── infer_struct.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   ├── post_layer_infer.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   ├── model.py
    │   │   └── triton_kernels
    │   │   │   ├── __init__.py
    │   │   │   ├── layernorm.py
    │   │   │   └── rotary_emb.py
    │   ├── deepseek2
    │   │   ├── __init__.py
    │   │   ├── flashattention_infer_struct.py
    │   │   ├── flashinfer_struct.py
    │   │   ├── infer_struct.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_weight.py
    │   │   ├── model.py
    │   │   └── triton_kernel
    │   │   │   ├── __init__.py
    │   │   │   ├── context_flashattention_nopad.py
    │   │   │   ├── context_flashattention_nopad_fp8.py
    │   │   │   ├── context_flashattention_nopad_with_v.py
    │   │   │   ├── destindex_copy_kv.py
    │   │   │   ├── destindex_copy_kv_fp8.py
    │   │   │   ├── gqa_flash_decoding.py
    │   │   │   ├── gqa_flash_decoding_config.py
    │   │   │   ├── gqa_flash_decoding_fp8.py
    │   │   │   ├── gqa_flash_decoding_stage1.py
    │   │   │   ├── gqa_flash_decoding_stage1_fp8.py
    │   │   │   ├── gqa_flash_decoding_stage2.py
    │   │   │   ├── repack_kv_index.py
    │   │   │   ├── repeat_rope.py
    │   │   │   ├── rotary_emb.py
    │   │   │   ├── rotary_emb_config.py
    │   │   │   ├── sample_kv.py
    │   │   │   └── weight_dequant.py
    │   ├── deepseek_mtp
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── pre_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── pre_and_post_layer_weight.py
    │   │   └── model.py
    │   ├── gemma3
    │   │   ├── __init__.py
    │   │   ├── gemma3_visual.py
    │   │   ├── infer_struct.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   ├── post_layer_infer.py
    │   │   │   ├── pre_layer_infer.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── gemma_2b
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_layer_infer.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   ├── model.py
    │   │   └── triton_kernel
    │   │   │   ├── __init__.py
    │   │   │   └── gelu_and_mul.py
    │   ├── gpt_oss
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── internlm
    │   │   ├── __init__.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── internlm2
    │   │   ├── __init__.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── internlm2_reward
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── post_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── pre_and_post_layer_weight.py
    │   │   └── model.py
    │   ├── internvl
    │   │   ├── __init__.py
    │   │   ├── img_process.py
    │   │   ├── internvl_visual.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── pre_and_post_layer_weight.py
    │   │   └── model.py
    │   ├── llama
    │   │   ├── __init__.py
    │   │   ├── flashattention_infer_struct.py
    │   │   ├── flashinfer_struct.py
    │   │   ├── infer_struct.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   ├── post_layer_infer.py
    │   │   │   ├── pre_layer_infer.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── ds_load_utils.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   ├── model.py
    │   │   ├── triton_kernel
    │   │   │   ├── __init__.py
    │   │   │   ├── context_flashattention_nopad.py
    │   │   │   ├── embedding.py
    │   │   │   ├── flash_decoding.py
    │   │   │   ├── flash_decoding_stage1.py
    │   │   │   ├── flash_decoding_stage2.py
    │   │   │   ├── gqa_decode_flashattention_nopad.py
    │   │   │   ├── gqa_flash_decoding.py
    │   │   │   ├── gqa_flash_decoding_stage1.py
    │   │   │   ├── gqa_flash_decoding_stage2.py
    │   │   │   ├── gqa_flash_decoding_vsm.py
    │   │   │   ├── ppl_fp16_flash_decoding.py
    │   │   │   ├── ppl_int4kv_copy_kv.py
    │   │   │   ├── ppl_int4kv_flash_decoding.py
    │   │   │   ├── ppl_int8kv_flash_decoding.py
    │   │   │   ├── ppl_int8kv_flash_decoding_diverse.py
    │   │   │   ├── ppl_int8kv_flash_decoding_diverse_stage1.py
    │   │   │   ├── ppl_int8kv_flash_decoding_diverse_stage3.py
    │   │   │   ├── ppl_quant_copy_kv.py
    │   │   │   ├── rmsnorm.py
    │   │   │   ├── rotary_emb.py
    │   │   │   ├── silu_and_mul.py
    │   │   │   ├── token_attention_nopad_att1.py
    │   │   │   ├── token_attention_nopad_reduceV.py
    │   │   │   ├── token_attention_nopad_softmax.py
    │   │   │   └── token_attention_softmax_and_reducev.py
    │   │   └── yarn_rotary_utils.py
    │   ├── llava
    │   │   ├── __init__.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── pre_and_post_layer_weight.py
    │   │   ├── llava_visual.py
    │   │   └── model.py
    │   ├── minicpm
    │   │   ├── __init__.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── mistral
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── model.py
    │   │   └── triton_kernel
    │   │   │   ├── __init__.py
    │   │   │   ├── context_flashattention_nopad.py
    │   │   │   ├── init_att_sliding_window_info.py
    │   │   │   ├── token_attention_nopad_att1.py
    │   │   │   ├── token_attention_nopad_reduceV.py
    │   │   │   └── token_attention_softmax_and_reducev.py
    │   ├── mixtral
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   ├── _custom_ops.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── phi3
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_weight.py
    │   │   ├── model.py
    │   │   └── triton_kernel
    │   │   │   ├── __init__.py
    │   │   │   ├── context_flashattention_nopad.py
    │   │   │   ├── destindex_copy_kv.py
    │   │   │   ├── flash_decoding.py
    │   │   │   ├── flash_decoding_stage1.py
    │   │   │   ├── flash_decoding_stage2.py
    │   │   │   └── rotary_emb.py
    │   ├── qwen
    │   │   ├── __init__.py
    │   │   ├── infer_struct.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── qwen2
    │   │   ├── __init__.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── qwen2_5_vl
    │   │   ├── __init__.py
    │   │   └── qwen2_5_visual.py
    │   ├── qwen2_reward
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── post_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── pre_and_post_layer_weight.py
    │   │   └── model.py
    │   ├── qwen2_vl
    │   │   ├── __init__.py
    │   │   ├── flashattention_infer_struct.py
    │   │   ├── infer_struct.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── model.py
    │   │   ├── qwen2_visual.py
    │   │   ├── triton_kernel
    │   │   │   ├── __init__.py
    │   │   │   ├── mrope.py
    │   │   │   └── rotary_pos_emb.py
    │   │   └── vision_process.py
    │   ├── qwen3
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── qwen3_moe
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── qwen_vl
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── pre_layer_infer.py
    │   │   ├── model.py
    │   │   └── qwen_visual.py
    │   ├── registry.py
    │   ├── stablelm
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── starcoder
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_layer_infer.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── starcoder2
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   └── model.py
    │   ├── tarsier2
    │   │   ├── __init__.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   └── pre_and_post_layer_weight.py
    │   │   ├── model.py
    │   │   └── tarsier2_visual.py
    │   ├── vit
    │   │   ├── __init__.py
    │   │   ├── layer_infer
    │   │   │   ├── __init__.py
    │   │   │   ├── post_layer_infer.py
    │   │   │   ├── pre_layer_infer.py
    │   │   │   └── transformer_layer_infer.py
    │   │   ├── layer_weights
    │   │   │   ├── __init__.py
    │   │   │   ├── hf_load_utils.py
    │   │   │   ├── pre_and_post_layer_weight.py
    │   │   │   └── transformer_layer_weight.py
    │   │   ├── model.py
    │   │   └── triton_kernel
    │   │   │   ├── __init__.py
    │   │   │   ├── flashattention_nopad.py
    │   │   │   ├── gelu_vit.py
    │   │   │   └── rms_norm_vit.py
    │   └── whisper
    │   │   ├── __init__.py
    │   │   ├── defaults.py
    │   │   ├── modeling_whisper.py
    │   │   └── whisper_audio.py
    ├── server
    │   ├── __init__.py
    │   ├── api_cli.py
    │   ├── api_http.py
    │   ├── api_lightllm.py
    │   ├── api_models.py
    │   ├── api_openai.py
    │   ├── api_server.py
    │   ├── api_start.py
    │   ├── api_tgi.py
    │   ├── audioserver
    │   │   ├── __init__.py
    │   │   ├── manager.py
    │   │   └── model_infer
    │   │   │   ├── __init__.py
    │   │   │   └── model_rpc.py
    │   ├── build_prompt.py
    │   ├── config_server
    │   │   ├── __init__.py
    │   │   ├── api_http.py
    │   │   └── nccl_tcp_store.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   └── objs
    │   │   │   ├── __init__.py
    │   │   │   ├── atomic_array_lock.py
    │   │   │   ├── atomic_lock.py
    │   │   │   ├── io_objs
    │   │   │       ├── __init__.py
    │   │   │       └── group_req.py
    │   │   │   ├── nixl_params.py
    │   │   │   ├── out_token_circlequeue.py
    │   │   │   ├── py_sampling_params.py
    │   │   │   ├── req.py
    │   │   │   ├── rpc_shm.py
    │   │   │   ├── sampling_params.py
    │   │   │   ├── shm_array.py
    │   │   │   ├── shm_objs_io_buffer.py
    │   │   │   ├── shm_req_manager.py
    │   │   │   ├── start_args_type.py
    │   │   │   └── token_chunck_hash_list.py
    │   ├── detokenization
    │   │   ├── __init__.py
    │   │   ├── decode.py
    │   │   ├── decode_mode_fix.py
    │   │   ├── decode_req.py
    │   │   └── manager.py
    │   ├── embed_cache
    │   │   ├── __init__.py
    │   │   ├── impl
    │   │   │   ├── __init__.py
    │   │   │   └── naive_memory_cache.py
    │   │   ├── manager.py
    │   │   └── utils.py
    │   ├── function_call_parser.py
    │   ├── health_monitor
    │   │   ├── __init__.py
    │   │   └── manager.py
    │   ├── httpserver
    │   │   ├── __init__.py
    │   │   ├── async_queue.py
    │   │   ├── manager.py
    │   │   └── pd_loop.py
    │   ├── httpserver_for_pd_master
    │   │   ├── __init__.py
    │   │   ├── manager.py
    │   │   ├── pd_selector
    │   │   │   ├── __init__.py
    │   │   │   └── pd_selector.py
    │   │   └── register_loop.py
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   ├── manager.py
    │   │   └── metrics.py
    │   ├── multi_level_kv_cache
    │   │   ├── __init__.py
    │   │   ├── cpu_cache_client.py
    │   │   ├── disk_cache_worker.py
    │   │   ├── manager.py
    │   │   └── shm_objs.py
    │   ├── multimodal_params.py
    │   ├── pd_io_struct.py
    │   ├── req_id_generator.py
    │   ├── router
    │   │   ├── __init__.py
    │   │   ├── batch.py
    │   │   ├── dynamic_prompt
    │   │   │   ├── __init__.py
    │   │   │   ├── radix_cache.py
    │   │   │   └── shared_arr.py
    │   │   ├── manager.py
    │   │   ├── model_infer
    │   │   │   ├── __init__.py
    │   │   │   ├── infer_batch.py
    │   │   │   ├── mode_backend
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base_backend.py
    │   │   │   │   ├── chunked_prefill
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── control_state.py
    │   │   │   │   │   ├── impl.py
    │   │   │   │   │   ├── impl_for_first_token_constraint_mode.py
    │   │   │   │   │   ├── impl_for_outlines_constraint_mode.py
    │   │   │   │   │   ├── impl_for_return_all_prompt_logprobs.py
    │   │   │   │   │   ├── impl_for_reward_model.py
    │   │   │   │   │   ├── impl_for_token_healing.py
    │   │   │   │   │   └── impl_for_xgrammar_mode.py
    │   │   │   │   ├── continues_batch
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── pd_mode
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── decode_node_impl
    │   │   │   │   │   │       ├── __init__.py
    │   │   │   │   │   │       ├── decode_impl.py
    │   │   │   │   │   │       ├── decode_impl_for_dp.py
    │   │   │   │   │   │       ├── decode_infer_rpyc.py
    │   │   │   │   │   │       ├── decode_kv_move_manager.py
    │   │   │   │   │   │       ├── decode_task_cache.py
    │   │   │   │   │   │       ├── decode_trans_obj.py
    │   │   │   │   │   │       ├── decode_trans_process.py
    │   │   │   │   │   │       └── up_status.py
    │   │   │   │   │   │   ├── p2p_fix.py
    │   │   │   │   │   │   ├── prefill_node_impl
    │   │   │   │   │   │       ├── __init__.py
    │   │   │   │   │   │       ├── prefill_impl.py
    │   │   │   │   │   │       ├── prefill_impl_for_dp.py
    │   │   │   │   │   │       ├── prefill_infer_rpyc.py
    │   │   │   │   │   │       ├── prefill_kv_move_manager.py
    │   │   │   │   │   │       ├── prefill_task_cache.py
    │   │   │   │   │   │       ├── prefill_trans_obj.py
    │   │   │   │   │   │       └── prefill_trans_process.py
    │   │   │   │   │   │   ├── task_queue.py
    │   │   │   │   │   │   └── utils.py
    │   │   │   │   ├── diverse_backend
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── impl.py
    │   │   │   │   ├── dp_backend
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── control_state.py
    │   │   │   │   │   ├── dp_shared_kv_trans.py
    │   │   │   │   │   └── impl.py
    │   │   │   │   ├── generic_padded_pre_process.py
    │   │   │   │   ├── generic_post_process.py
    │   │   │   │   ├── generic_pre_process.py
    │   │   │   │   ├── mtp_pre_process.py
    │   │   │   │   ├── multi_level_kv_cache.py
    │   │   │   │   ├── overlap_events.py
    │   │   │   │   ├── pd_nixl
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── base_kv_move_manager.py
    │   │   │   │   │   ├── decode_node_impl
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── decode_impl.py
    │   │   │   │   │   │   ├── decode_impl_for_dp.py
    │   │   │   │   │   │   ├── decode_kv_move_manager.py
    │   │   │   │   │   │   ├── decode_trans_process.py
    │   │   │   │   │   │   └── up_status.py
    │   │   │   │   │   ├── nixl_kv_transporter.py
    │   │   │   │   │   ├── prefill_node_impl
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── prefill_impl.py
    │   │   │   │   │   │   ├── prefill_impl_for_dp.py
    │   │   │   │   │   │   ├── prefill_kv_move_manager.py
    │   │   │   │   │   │   └── prefill_trans_process.py
    │   │   │   │   │   └── trans_process_obj.py
    │   │   │   │   ├── pre.py
    │   │   │   │   └── redundancy_expert_manager.py
    │   │   │   ├── model_rpc.py
    │   │   │   └── pin_mem_manager.py
    │   │   ├── req_queue
    │   │   │   ├── __init__.py
    │   │   │   ├── base_queue.py
    │   │   │   ├── chunked_prefill
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── beam_impl.py
    │   │   │   │   ├── impl.py
    │   │   │   │   ├── impl_for_nixl_pd.py
    │   │   │   │   └── impl_for_pd_decode.py
    │   │   │   ├── dp_balancer
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── base.py
    │   │   │   │   ├── bs.py
    │   │   │   │   └── roundrobin.py
    │   │   │   └── dp_base_queue.py
    │   │   ├── stats.py
    │   │   └── token_load.py
    │   ├── tokenizer.py
    │   └── visualserver
    │   │   ├── __init__.py
    │   │   ├── manager.py
    │   │   └── model_infer
    │   │       ├── __init__.py
    │   │       └── model_rpc.py
    └── utils
    │   ├── __init__.py
    │   ├── auto_shm_cleanup.py
    │   ├── config_utils.py
    │   ├── custom_kernel_utis.py
    │   ├── device_utils.py
    │   ├── dist_utils.py
    │   ├── envs_utils.py
    │   ├── error_utils.py
    │   ├── graceful_utils.py
    │   ├── health_check.py
    │   ├── infer_utils.py
    │   ├── kv_cache_utils.py
    │   ├── light_utils.py
    │   ├── llm_utils.py
    │   ├── log_utils.py
    │   ├── multimodal_utils.py
    │   ├── multinode_utils.py
    │   ├── net_utils.py
    │   ├── petrel_helper.py
    │   ├── process_check.py
    │   ├── profile_max_tokens.py
    │   ├── retry_utils.py
    │   ├── rpyc_fix_utils.py
    │   ├── sgl_utils.py
    │   ├── shm_size_check.py
    │   ├── shm_utils.py
    │   ├── start_utils.py
    │   ├── statics_utils.py
    │   ├── time_utils.py
    │   ├── torch_ops_utils.py
    │   ├── tuning_utils.py
    │   ├── vllm_utils.py
    │   └── watchdog_utils.py
├── requirements.txt
├── setup.py
├── test
    ├── advanced_config
    │   ├── fp8_calibration_per_head
    │   │   ├── test_kv_cache_calib_per_head_qwen2.5_14b.json
    │   │   ├── test_kv_cache_calib_per_head_qwen2.5_32b.json
    │   │   ├── test_kv_cache_calib_per_head_qwen2.5_72b.json
    │   │   ├── test_kv_cache_calib_per_head_qwen3_235b.json
    │   │   └── test_kv_cache_calib_per_head_qwen3_30b.json
    │   ├── fp8_calibration_per_tensor
    │   │   ├── test_kv_cache_calib_per_tensor_qwen2.5_14b.json
    │   │   ├── test_kv_cache_calib_per_tensor_qwen2.5_32b.json
    │   │   ├── test_kv_cache_calib_per_tensor_qwen2.5_72b.json
    │   │   ├── test_kv_cache_calib_per_tensor_qwen3_235b.json
    │   │   └── test_kv_cache_calib_per_tensor_qwen3_30b.json
    │   ├── mixed_quantization
    │   │   └── llamacls-mix-down.yaml
    │   └── redundancy_expert
    │   │   └── test_redundancy_expert_config.json
    ├── benchmark
    │   ├── kernel
    │   │   └── benchmark_fused_moe_triton.py
    │   ├── service
    │   │   ├── benchmark_client.py
    │   │   ├── benchmark_mcq.py
    │   │   ├── benchmark_prompt_cache.py
    │   │   ├── benchmark_prompt_cache_multi_server.py
    │   │   ├── benchmark_qps.py
    │   │   └── benchmark_sharegpt.py
    │   └── static_inference
    │   │   ├── model_infer.py
    │   │   ├── model_infer_mtp.py
    │   │   ├── profile_demo.py
    │   │   ├── test_model.py
    │   │   └── test_vit.py
    ├── chat_template
    │   ├── tool_chat_template_deepseekr1.jinja
    │   ├── tool_chat_template_deepseekv3.jinjia
    │   ├── tool_chat_template_deepseekv31.jinja
    │   └── tool_chat_template_deepseekv32.jinjia
    ├── compare_with_previous_commit.py
    ├── format_out
    │   ├── gomoku_game.py
    │   ├── qabot.py
    │   ├── test_constraint_server.py
    │   ├── test_demo.py
    │   └── test_xgrammar_constraint.py
    ├── kernel
    │   ├── deepseekv2_bmm_scaled_fp8_tuning.py
    │   ├── deepseekv2_gqa_decode_tuning.py
    │   ├── deepseekv3_fp8_block_gemm_tuning.py
    │   ├── deepseekv3_rotary_emb_tuning.py
    │   ├── fuse_moe_tuning.py
    │   ├── llama_gqa_decode_vsm_tuning.py
    │   ├── llama_gqa_diverse_decode_stage1_tuning.py
    │   ├── moe_silu_and_mul_tuning_bf16.py
    │   └── moe_sum_reduce_tuning_bf16.py
    ├── start_scripts
    │   ├── README.md
    │   ├── draft.sh
    │   ├── multi_node_ep_node0.sh
    │   ├── multi_node_ep_node1.sh
    │   ├── multi_node_tp_node0.sh
    │   ├── multi_node_tp_node1.sh
    │   ├── multi_pd_master.sh
    │   ├── multi_pd_master
    │   │   ├── config_server.sh
    │   │   ├── pd_decode.sh
    │   │   ├── pd_master_1.sh
    │   │   ├── pd_master_2.sh
    │   │   └── pd_prefill.sh
    │   ├── single_node_ep.sh
    │   ├── single_node_tp.sh
    │   ├── single_node_tp_cpu_cache_enable.sh
    │   └── single_pd_master
    │   │   ├── pd_decode.sh
    │   │   ├── pd_master.sh
    │   │   ├── pd_nixl_decode.sh
    │   │   ├── pd_nixl_prefill.sh
    │   │   └── pd_prefill.sh
    └── test_api
    │   ├── test.jpg
    │   ├── test_generate_api.py
    │   ├── test_multimodal_api.py
    │   └── test_openai_api.py
├── tools
    ├── quick_launch_docker.py
    └── resolve_ptx_version
└── unit_tests
    ├── common
        ├── basemodel
        │   └── triton_kernel
        │   │   ├── test_add_in_place.py
        │   │   ├── test_gen_decode_params.py
        │   │   ├── test_gen_mtp_prefill_params.py
        │   │   ├── test_gen_prefill_params.py
        │   │   ├── test_gen_sampling_params.py
        │   │   ├── test_multimodal_emb.py
        │   │   ├── test_redundancy_topk_ids_repair.py
        │   │   └── test_sp_pad_kernel.py
        ├── fused_moe
        │   ├── test_deepep.py
        │   ├── test_grouped_fused_moe.py
        │   ├── test_grouped_fused_moe_speed.py
        │   ├── test_grouped_topk.py
        │   ├── test_moe_silu_and_mul_mix_quant_ep.py
        │   └── test_softmax_topk.py
        ├── kv_trans_kernel
        │   ├── test_kv_trans_v2.py
        │   └── test_nixl_kv_trans.py
        └── quantization
        │   └── test_fp8_scaled_mm_per_token.py
    ├── models
        ├── deepseek2
        │   ├── test_destindex_copy_kv.py
        │   ├── test_destindex_copy_kv_fp8.py
        │   ├── test_gqa_flash_decoding.py
        │   ├── test_gqa_flash_decoding_fp8.py
        │   ├── test_repack_kv_index.py
        │   └── test_rope_repeat.py
        ├── llama
        │   ├── test_context_flashattention_nopad.py
        │   ├── test_context_flashattention_nopad_fa3_fp8.py
        │   ├── test_context_flashattention_nopad_flashinfer_fp8.py
        │   ├── test_ppl_int8kv_flash_decoding_diverse.py
        │   ├── test_ppl_int8kv_flash_decoding_diverse_stage1.py
        │   ├── test_ppl_int8kv_flash_decoding_diverse_stage2.py
        │   ├── test_ppl_int8kv_flash_decoding_diverse_stage3.py
        │   ├── test_token_attention_nopad.py
        │   ├── test_token_attention_nopad_fa3_fp8.py
        │   └── test_token_attention_nopad_flashinfer_fp8.py
        ├── qwen2_vl
        │   ├── test_mrope.py
        │   └── test_rotary_pos_emb.py
        └── vit
        │   └── test_flash_attention_forward.py
    ├── server
        ├── core
        │   └── objs
        │   │   ├── test_atomic_array_lock.py
        │   │   ├── test_atomic_lock.py
        │   │   ├── test_out_token_circlequeue.py
        │   │   ├── test_req.py
        │   │   ├── test_sampling_params.py
        │   │   ├── test_shm_array.py
        │   │   └── test_shm_req_manager.py
        └── router
        │   └── dynamic_prompt
        │       └── test_radix_cache.py
    └── utils
        └── test_custom_kernel_utils.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/.github/ISSUE_TEMPLATE/bug_report.md


--------------------------------------------------------------------------------
/.github/workflows/docker-publish.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/.github/workflows/docker-publish.yml


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/.github/workflows/pre-commit.yml


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .pyc
3 | build
4 | dist
5 | *.egg-info
6 | .idea
7 | .vscode
8 | tmp/
9 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/.pre-commit-config.yaml


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/CONTRIBUTING.md


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/README.md


--------------------------------------------------------------------------------
/assets/att.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/assets/att.gif


--------------------------------------------------------------------------------
/assets/lightllm.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/assets/lightllm.drawio.png


--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/assets/logo.png


--------------------------------------------------------------------------------
/assets/logo_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/assets/logo_new.png


--------------------------------------------------------------------------------
/benchmark.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/benchmark.md


--------------------------------------------------------------------------------
/demos/qa_server/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/demos/qa_server/chat_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/demos/qa_server/chat_server.py


--------------------------------------------------------------------------------
/demos/qa_server/qabot.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/demos/qa_server/qabot.py


--------------------------------------------------------------------------------
/demos/qa_server/templates/chat.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/demos/qa_server/templates/chat.html


--------------------------------------------------------------------------------
/demos/readme.txt:
--------------------------------------------------------------------------------
1 | 一些应用demo的目录


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/Dockerfile


--------------------------------------------------------------------------------
/docker/Dockerfile.deepep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/Dockerfile.deepep


--------------------------------------------------------------------------------
/docker/Dockerfile.nixl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/Dockerfile.nixl


--------------------------------------------------------------------------------
/docker/Dockerfile.nixl.deepep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/Dockerfile.nixl.deepep


--------------------------------------------------------------------------------
/docker/cuda_version_12.6.1/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.6.1/Dockerfile


--------------------------------------------------------------------------------
/docker/cuda_version_12.6.1/Dockerfile.deepep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.6.1/Dockerfile.deepep


--------------------------------------------------------------------------------
/docker/cuda_version_12.6.1/Dockerfile.nixl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.6.1/Dockerfile.nixl


--------------------------------------------------------------------------------
/docker/cuda_version_12.6.1/Dockerfile.nixl.deepep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.6.1/Dockerfile.nixl.deepep


--------------------------------------------------------------------------------
/docker/cuda_version_12.8.0/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.8.0/Dockerfile


--------------------------------------------------------------------------------
/docker/cuda_version_12.8.0/Dockerfile.deepep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.8.0/Dockerfile.deepep


--------------------------------------------------------------------------------
/docker/cuda_version_12.8.0/Dockerfile.nixl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.8.0/Dockerfile.nixl


--------------------------------------------------------------------------------
/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep


--------------------------------------------------------------------------------
/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep.cache:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep.cache


--------------------------------------------------------------------------------
/docs/CN/.readthedocs.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/.readthedocs.yaml


--------------------------------------------------------------------------------
/docs/CN/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/Makefile


--------------------------------------------------------------------------------
/docs/CN/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/README.md


--------------------------------------------------------------------------------
/docs/CN/make.bat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/make.bat


--------------------------------------------------------------------------------
/docs/CN/rebuild.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/rebuild.sh


--------------------------------------------------------------------------------
/docs/CN/requirements-docs.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/requirements-docs.txt


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/ER1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/ER1.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/ER2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/ER2.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/ER3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/ER3.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/ER4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/ER4.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/HttpServer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/HttpServer.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/Performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/Performance.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/Performance2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/Performance2.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/Router.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/Router.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/Visual_Server.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/Visual_Server.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/arch.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/backend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/backend.png


--------------------------------------------------------------------------------
/docs/CN/source/assets/lightllm/token_attn.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/token_attn.gif


--------------------------------------------------------------------------------
/docs/CN/source/assets/logos/lightllm-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/logos/lightllm-logo.png


--------------------------------------------------------------------------------
/docs/CN/source/conf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/conf.py


--------------------------------------------------------------------------------
/docs/CN/source/framework/framework.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/framework/framework.rst


--------------------------------------------------------------------------------
/docs/CN/source/framework/router.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/framework/router.rst


--------------------------------------------------------------------------------
/docs/CN/source/framework/token_attention.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/framework/token_attention.rst


--------------------------------------------------------------------------------
/docs/CN/source/getting_started/benchmark.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/getting_started/benchmark.rst


--------------------------------------------------------------------------------
/docs/CN/source/getting_started/installation.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/getting_started/installation.rst


--------------------------------------------------------------------------------
/docs/CN/source/getting_started/quickstart.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/getting_started/quickstart.rst


--------------------------------------------------------------------------------
/docs/CN/source/index.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/index.rst


--------------------------------------------------------------------------------
/docs/CN/source/models/add_new_model.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/models/add_new_model.md


--------------------------------------------------------------------------------
/docs/CN/source/models/supported_models.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/models/supported_models.rst


--------------------------------------------------------------------------------
/docs/CN/source/tutorial/api_param.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/api_param.rst


--------------------------------------------------------------------------------
/docs/CN/source/tutorial/api_server_args_zh.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/api_server_args_zh.rst


--------------------------------------------------------------------------------
/docs/CN/source/tutorial/deepseek_deployment.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/deepseek_deployment.rst


--------------------------------------------------------------------------------
/docs/CN/source/tutorial/multi_level_cache_deployment.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/multi_level_cache_deployment.rst


--------------------------------------------------------------------------------
/docs/CN/source/tutorial/multimodal.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/multimodal.rst


--------------------------------------------------------------------------------
/docs/CN/source/tutorial/openai.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/openai.rst


--------------------------------------------------------------------------------
/docs/CN/source/tutorial/reward_model.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/reward_model.rst


--------------------------------------------------------------------------------
/docs/EN/.readthedocs.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/.readthedocs.yaml


--------------------------------------------------------------------------------
/docs/EN/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/Makefile


--------------------------------------------------------------------------------
/docs/EN/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/README.md


--------------------------------------------------------------------------------
/docs/EN/make.bat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/make.bat


--------------------------------------------------------------------------------
/docs/EN/rebuild.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/rebuild.sh


--------------------------------------------------------------------------------
/docs/EN/requirements-docs.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/requirements-docs.txt


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/ER1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/ER1.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/ER2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/ER2.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/ER3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/ER3.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/ER4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/ER4.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/HttpServer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/HttpServer.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/Performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/Performance.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/Performance2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/Performance2.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/Router.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/Router.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/Visual_Server.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/Visual_Server.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/arch.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/backend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/backend.png


--------------------------------------------------------------------------------
/docs/EN/source/assets/lightllm/token_attn.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/token_attn.gif


--------------------------------------------------------------------------------
/docs/EN/source/assets/logos/lightllm-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/logos/lightllm-logo.png


--------------------------------------------------------------------------------
/docs/EN/source/conf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/conf.py


--------------------------------------------------------------------------------
/docs/EN/source/framework/framework.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/framework/framework.rst


--------------------------------------------------------------------------------
/docs/EN/source/framework/router.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/framework/router.rst


--------------------------------------------------------------------------------
/docs/EN/source/framework/token_attention.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/framework/token_attention.rst


--------------------------------------------------------------------------------
/docs/EN/source/getting_started/benchmark.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/getting_started/benchmark.rst


--------------------------------------------------------------------------------
/docs/EN/source/getting_started/installation.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/getting_started/installation.rst


--------------------------------------------------------------------------------
/docs/EN/source/getting_started/quickstart.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/getting_started/quickstart.rst


--------------------------------------------------------------------------------
/docs/EN/source/index.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/index.rst


--------------------------------------------------------------------------------
/docs/EN/source/models/add_new_model.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/models/add_new_model.md


--------------------------------------------------------------------------------
/docs/EN/source/models/supported_models.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/models/supported_models.rst


--------------------------------------------------------------------------------
/docs/EN/source/tutorial/api_param.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/api_param.rst


--------------------------------------------------------------------------------
/docs/EN/source/tutorial/api_server_args_zh.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/api_server_args_zh.rst


--------------------------------------------------------------------------------
/docs/EN/source/tutorial/deepseek_deployment.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/deepseek_deployment.rst


--------------------------------------------------------------------------------
/docs/EN/source/tutorial/multi_level_cache_deployment.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/multi_level_cache_deployment.rst


--------------------------------------------------------------------------------
/docs/EN/source/tutorial/multimodal.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/multimodal.rst


--------------------------------------------------------------------------------
/docs/EN/source/tutorial/openai.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/openai.rst


--------------------------------------------------------------------------------
/docs/EN/source/tutorial/reward_model.rst:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/reward_model.rst


--------------------------------------------------------------------------------
/format.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format.py


--------------------------------------------------------------------------------
/format_out/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/format_out/grammer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/__init__.py


--------------------------------------------------------------------------------
/format_out/grammer/core.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/core.py


--------------------------------------------------------------------------------
/format_out/grammer/dpda.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/dpda.py


--------------------------------------------------------------------------------
/format_out/grammer/json.ebnf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/json.ebnf


--------------------------------------------------------------------------------
/format_out/grammer/test.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test.sh


--------------------------------------------------------------------------------
/format_out/grammer/test0.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test0.py


--------------------------------------------------------------------------------
/format_out/grammer/test1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test1.py


--------------------------------------------------------------------------------
/format_out/grammer/test2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test2.py


--------------------------------------------------------------------------------
/format_out/grammer/test3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test3.py


--------------------------------------------------------------------------------
/format_out/grammer/test4.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test4.py


--------------------------------------------------------------------------------
/format_out/grammer/test5.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test5.py


--------------------------------------------------------------------------------
/format_out/grammer/test6.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test6.py


--------------------------------------------------------------------------------
/format_out/impl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/impl.py


--------------------------------------------------------------------------------
/lightllm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/all_kernel_configs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/basemodel/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/__init__.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/basemodel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/basemodel.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/batch_objs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/batch_objs.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/cuda_graph.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/cuda_graph.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/infer_lock.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/infer_lock.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/infer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/infer_struct.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/base_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/base_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/cache_tensor_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/cache_tensor_manager.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/post_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/post_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/pre_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/pre_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/template/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/template/post_layer_infer_template.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/template/post_layer_infer_template.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/template/pre_layer_infer_template.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/template/pre_layer_infer_template.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/template/transformer_layer_infer_cohere_template.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/template/transformer_layer_infer_cohere_template.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/template/transformer_layer_infer_template.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/template/transformer_layer_infer_template.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/base_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/base_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/hf_load_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/hf_load_utils.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/__init__.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/base_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/base_weight.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep_redundancy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep_redundancy.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_tp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_tp.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/gpt_oss_fused_moe_weight_tp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/gpt_oss_fused_moe_weight_tp.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/__init__.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/colmm_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/colmm_weight.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_factory.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_factory.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_slicer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_slicer.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_weight.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/rowmm_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/rowmm_weight.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/meta_weights/norm_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/norm_weight.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/multimodal_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/multimodal_tokenizer.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/add_in_place.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/add_in_place.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/apply_penalty.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/apply_penalty.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/apply_penalty_gpu_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/apply_penalty_gpu_cache.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/bmm_scaled_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/bmm_scaled_fp8.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/copy_kv_index_to_req.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/copy_kv_index_to_req.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/dequantize_gemm_int4.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/dequantize_gemm_int4.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/dequantize_gemm_int8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/dequantize_gemm_int8.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/destindex_copy_kv.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/destindex_copy_kv.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/destindex_copy_kv_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/destindex_copy_kv_fp8.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/fa3_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/fa3_utils.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/gather_token_id.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/gather_token_id.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/gen_decode_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/gen_decode_params.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/gen_mtp_prefill_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/gen_mtp_prefill_params.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/gen_prefill_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/gen_prefill_params.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/gen_sampling_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/gen_sampling_params.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/kv_cache_offload.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/kv_cache_offload.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/mtp_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/mtp_utils.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/multimodal_emb.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/multimodal_emb.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/q_per_head_fp8_quant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/q_per_head_fp8_quant.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/quantize_gemm_int8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/quantize_gemm_int8.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/redundancy_topk_ids_repair.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/redundancy_topk_ids_repair.py


--------------------------------------------------------------------------------
/lightllm/common/basemodel/triton_kernel/sp_pad_copy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/sp_pad_copy.py


--------------------------------------------------------------------------------
/lightllm/common/build_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/build_utils.py


--------------------------------------------------------------------------------
/lightllm/common/cuda_wrapper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/cuda_wrapper.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/deepep_scatter_gather.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/deepep_scatter_gather.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/grouped_fused_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/grouped_fused_moe.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/grouped_fused_moe_ep.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/grouped_fused_moe_ep.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/grouped_topk.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/grouped_topk.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/moe_kernel_configs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_kernel_configs.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/moe_silu_and_mul.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_silu_and_mul.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/moe_silu_and_mul_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_silu_and_mul_config.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/moe_silu_and_mul_mix_quant_ep.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_silu_and_mul_mix_quant_ep.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/moe_sum_recude_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_sum_recude_config.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/moe_sum_reduce.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_sum_reduce.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/softmax_topk.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/softmax_topk.py


--------------------------------------------------------------------------------
/lightllm/common/fused_moe/topk_select.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/topk_select.py


--------------------------------------------------------------------------------
/lightllm/common/infer_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/infer_utils.py


--------------------------------------------------------------------------------
/lightllm/common/kernel_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kernel_config.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/__init__.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/calibration_fp8kv_mem_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/calibration_fp8kv_mem_manager.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/deepseek2_fp8kv_mem_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/deepseek2_fp8kv_mem_manager.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/deepseek2_mem_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/deepseek2_mem_manager.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/export_calibration_mem_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/export_calibration_mem_manager.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/int8kv_mem_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/int8kv_mem_manager.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/mem_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/mem_manager.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/mem_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/mem_utils.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/ppl_int4kv_mem_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/ppl_int4kv_mem_manager.py


--------------------------------------------------------------------------------
/lightllm/common/kv_cache_mem_manager/ppl_int8kv_mem_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/ppl_int8kv_mem_manager.py


--------------------------------------------------------------------------------
/lightllm/common/kv_trans_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/kv_trans_kernel/kv_trans.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_trans_kernel/kv_trans.py


--------------------------------------------------------------------------------
/lightllm/common/kv_trans_kernel/kv_trans_v2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_trans_kernel/kv_trans_v2.py


--------------------------------------------------------------------------------
/lightllm/common/kv_trans_kernel/nixl_kv_trans.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_trans_kernel/nixl_kv_trans.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/__init__.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/awq_quant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/awq_quant.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/deepgemm_quant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/deepgemm_quant.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/quantize_method.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/quantize_method.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/registry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/registry.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/torchao_quant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/torchao_quant.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/triton_quant/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/quantization/triton_quant/fp8/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/quantization/triton_quant/fp8/fp8act_quant_kernel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/triton_quant/fp8/fp8act_quant_kernel.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_block_gemm_kernel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_block_gemm_kernel.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_block_quant_kernel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_block_quant_kernel.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_scaled_mm_per_token_kernel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_scaled_mm_per_token_kernel.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/triton_quant/triton_quant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/triton_quant/triton_quant.py


--------------------------------------------------------------------------------
/lightllm/common/quantization/w8a8_quant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/w8a8_quant.py


--------------------------------------------------------------------------------
/lightllm/common/req_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/req_manager.py


--------------------------------------------------------------------------------
/lightllm/common/triton_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/common/triton_utils/autotuner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/triton_utils/autotuner.py


--------------------------------------------------------------------------------
/lightllm/distributed/__init__.py:
--------------------------------------------------------------------------------
1 | from .communication_op import *
2 | 


--------------------------------------------------------------------------------
/lightllm/distributed/communication_op.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/distributed/communication_op.py


--------------------------------------------------------------------------------
/lightllm/distributed/custom_all_gather.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/distributed/custom_all_gather.py


--------------------------------------------------------------------------------
/lightllm/distributed/custom_all_reduce.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/distributed/custom_all_reduce.py


--------------------------------------------------------------------------------
/lightllm/distributed/pynccl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/distributed/pynccl.py


--------------------------------------------------------------------------------
/lightllm/distributed/pynccl_wrapper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/distributed/pynccl_wrapper.py


--------------------------------------------------------------------------------
/lightllm/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/__init__.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/bloom/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/bloom/layer_infer/post_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_infer/post_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/layer_infer/pre_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_infer/pre_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/bloom/layer_weights/hf_load_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_weights/hf_load_utils.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/model.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/triton_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/bloom/triton_kernel/context_flashattention_nopad.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/context_flashattention_nopad.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/triton_kernel/layernorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/layernorm.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/triton_kernel/token_attention_nopad_att1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/token_attention_nopad_att1.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/triton_kernel/token_attention_nopad_reduceV.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/token_attention_nopad_reduceV.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/triton_kernel/token_attention_nopad_softmax.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/token_attention_nopad_softmax.py


--------------------------------------------------------------------------------
/lightllm/models/bloom/triton_kernel/token_flashattention_nopad.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/token_flashattention_nopad.py


--------------------------------------------------------------------------------
/lightllm/models/chatglm2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/chatglm2/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/chatglm2/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/chatglm2/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/chatglm2/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/chatglm2/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/chatglm2/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/chatglm2/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/chatglm2/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/chatglm2/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/chatglm2/model.py


--------------------------------------------------------------------------------
/lightllm/models/chatglm2/triton_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/chatglm2/triton_kernel/rotary_emb.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/chatglm2/triton_kernel/rotary_emb.py


--------------------------------------------------------------------------------
/lightllm/models/cohere/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/cohere/infer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/infer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/cohere/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/cohere/layer_infer/post_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/layer_infer/post_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/cohere/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/cohere/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/cohere/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/cohere/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/cohere/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/model.py


--------------------------------------------------------------------------------
/lightllm/models/cohere/triton_kernels/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/cohere/triton_kernels/layernorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/triton_kernels/layernorm.py


--------------------------------------------------------------------------------
/lightllm/models/cohere/triton_kernels/rotary_emb.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/triton_kernels/rotary_emb.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/flashattention_infer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/flashattention_infer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/flashinfer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/flashinfer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/infer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/infer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/model.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad_fp8.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad_with_v.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad_with_v.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/destindex_copy_kv.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/destindex_copy_kv.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/destindex_copy_kv_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/destindex_copy_kv_fp8.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_config.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_fp8.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage1.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage1_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage1_fp8.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage2.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/repack_kv_index.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/repack_kv_index.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/repeat_rope.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/repeat_rope.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/rotary_emb.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/rotary_emb.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/rotary_emb_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/rotary_emb_config.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/sample_kv.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/sample_kv.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek2/triton_kernel/weight_dequant.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/weight_dequant.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek_mtp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/deepseek_mtp/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/deepseek_mtp/layer_infer/pre_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek_mtp/layer_infer/pre_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek_mtp/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/deepseek_mtp/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek_mtp/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/deepseek_mtp/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek_mtp/model.py


--------------------------------------------------------------------------------
/lightllm/models/gemma3/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/gemma3/gemma3_visual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/gemma3_visual.py


--------------------------------------------------------------------------------
/lightllm/models/gemma3/infer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/infer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/gemma3/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/gemma3/layer_infer/post_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/layer_infer/post_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/gemma3/layer_infer/pre_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/layer_infer/pre_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/gemma3/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/gemma3/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/gemma3/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/gemma3/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/gemma3/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/model.py


--------------------------------------------------------------------------------
/lightllm/models/gemma_2b/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/gemma_2b/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/gemma_2b/layer_infer/pre_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/layer_infer/pre_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/gemma_2b/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/gemma_2b/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/gemma_2b/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/gemma_2b/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/gemma_2b/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/model.py


--------------------------------------------------------------------------------
/lightllm/models/gemma_2b/triton_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/gemma_2b/triton_kernel/gelu_and_mul.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/triton_kernel/gelu_and_mul.py


--------------------------------------------------------------------------------
/lightllm/models/gpt_oss/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/gpt_oss/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/gpt_oss/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/gpt_oss/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gpt_oss/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/gpt_oss/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gpt_oss/model.py


--------------------------------------------------------------------------------
/lightllm/models/internlm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/internlm/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/internlm/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/internlm/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm/model.py


--------------------------------------------------------------------------------
/lightllm/models/internlm2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/internlm2/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/internlm2/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/internlm2/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/internlm2/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2/model.py


--------------------------------------------------------------------------------
/lightllm/models/internlm2_reward/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/internlm2_reward/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/internlm2_reward/layer_infer/post_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2_reward/layer_infer/post_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/internlm2_reward/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/internlm2_reward/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2_reward/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/internlm2_reward/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2_reward/model.py


--------------------------------------------------------------------------------
/lightllm/models/internvl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/internvl/img_process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internvl/img_process.py


--------------------------------------------------------------------------------
/lightllm/models/internvl/internvl_visual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internvl/internvl_visual.py


--------------------------------------------------------------------------------
/lightllm/models/internvl/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/internvl/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internvl/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/internvl/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internvl/model.py


--------------------------------------------------------------------------------
/lightllm/models/llama/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/llama/flashattention_infer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/flashattention_infer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/llama/flashinfer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/flashinfer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/llama/infer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/infer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/llama/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/llama/layer_infer/post_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_infer/post_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/llama/layer_infer/pre_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_infer/pre_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/llama/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/llama/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/llama/layer_weights/ds_load_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_weights/ds_load_utils.py


--------------------------------------------------------------------------------
/lightllm/models/llama/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/llama/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/llama/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/model.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/embedding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/embedding.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/flash_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/flash_decoding.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/flash_decoding_stage1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/flash_decoding_stage1.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/flash_decoding_stage2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/flash_decoding_stage2.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/gqa_decode_flashattention_nopad.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/gqa_decode_flashattention_nopad.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/gqa_flash_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/gqa_flash_decoding.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/gqa_flash_decoding_stage1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/gqa_flash_decoding_stage1.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/gqa_flash_decoding_stage2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/gqa_flash_decoding_stage2.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/gqa_flash_decoding_vsm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/gqa_flash_decoding_vsm.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/ppl_fp16_flash_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_fp16_flash_decoding.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/ppl_int4kv_copy_kv.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int4kv_copy_kv.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/ppl_int4kv_flash_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int4kv_flash_decoding.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse_stage1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse_stage1.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse_stage3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse_stage3.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/ppl_quant_copy_kv.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_quant_copy_kv.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/rmsnorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/rmsnorm.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/rotary_emb.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/rotary_emb.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/silu_and_mul.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/silu_and_mul.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/token_attention_nopad_att1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/token_attention_nopad_att1.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/token_attention_nopad_reduceV.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/token_attention_nopad_reduceV.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/token_attention_nopad_softmax.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/token_attention_nopad_softmax.py


--------------------------------------------------------------------------------
/lightllm/models/llama/triton_kernel/token_attention_softmax_and_reducev.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/token_attention_softmax_and_reducev.py


--------------------------------------------------------------------------------
/lightllm/models/llama/yarn_rotary_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/yarn_rotary_utils.py


--------------------------------------------------------------------------------
/lightllm/models/llava/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/llava/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/llava/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llava/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/llava/llava_visual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llava/llava_visual.py


--------------------------------------------------------------------------------
/lightllm/models/llava/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llava/model.py


--------------------------------------------------------------------------------
/lightllm/models/minicpm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/minicpm/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/minicpm/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/minicpm/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/minicpm/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/minicpm/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/minicpm/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/minicpm/model.py


--------------------------------------------------------------------------------
/lightllm/models/mistral/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/mistral/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/mistral/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/mistral/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/model.py


--------------------------------------------------------------------------------
/lightllm/models/mistral/triton_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/mistral/triton_kernel/context_flashattention_nopad.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/triton_kernel/context_flashattention_nopad.py


--------------------------------------------------------------------------------
/lightllm/models/mistral/triton_kernel/init_att_sliding_window_info.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/triton_kernel/init_att_sliding_window_info.py


--------------------------------------------------------------------------------
/lightllm/models/mistral/triton_kernel/token_attention_nopad_att1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/triton_kernel/token_attention_nopad_att1.py


--------------------------------------------------------------------------------
/lightllm/models/mistral/triton_kernel/token_attention_nopad_reduceV.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/triton_kernel/token_attention_nopad_reduceV.py


--------------------------------------------------------------------------------
/lightllm/models/mistral/triton_kernel/token_attention_softmax_and_reducev.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/triton_kernel/token_attention_softmax_and_reducev.py


--------------------------------------------------------------------------------
/lightllm/models/mixtral/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/mixtral/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/mixtral/layer_infer/_custom_ops.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mixtral/layer_infer/_custom_ops.py


--------------------------------------------------------------------------------
/lightllm/models/mixtral/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mixtral/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/mixtral/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/mixtral/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mixtral/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/mixtral/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mixtral/model.py


--------------------------------------------------------------------------------
/lightllm/models/phi3/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/phi3/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/phi3/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/phi3/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/phi3/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/phi3/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/model.py


--------------------------------------------------------------------------------
/lightllm/models/phi3/triton_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lightllm/models/phi3/triton_kernel/context_flashattention_nopad.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/context_flashattention_nopad.py


--------------------------------------------------------------------------------
/lightllm/models/phi3/triton_kernel/destindex_copy_kv.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/destindex_copy_kv.py


--------------------------------------------------------------------------------
/lightllm/models/phi3/triton_kernel/flash_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/flash_decoding.py


--------------------------------------------------------------------------------
/lightllm/models/phi3/triton_kernel/flash_decoding_stage1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/flash_decoding_stage1.py


--------------------------------------------------------------------------------
/lightllm/models/phi3/triton_kernel/flash_decoding_stage2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/flash_decoding_stage2.py


--------------------------------------------------------------------------------
/lightllm/models/phi3/triton_kernel/rotary_emb.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/rotary_emb.py


--------------------------------------------------------------------------------
/lightllm/models/qwen/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen/infer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen/infer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/qwen/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/qwen/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/qwen/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/qwen/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen/model.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen2/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen2/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2/model.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_5_vl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen2_5_vl/qwen2_5_visual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_5_vl/qwen2_5_visual.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_reward/__init__.py:
--------------------------------------------------------------------------------
1 |  
2 |   


--------------------------------------------------------------------------------
/lightllm/models/qwen2_reward/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 |    


--------------------------------------------------------------------------------
/lightllm/models/qwen2_reward/layer_infer/post_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_reward/layer_infer/post_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_reward/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 |    


--------------------------------------------------------------------------------
/lightllm/models/qwen2_reward/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_reward/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_reward/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_reward/model.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/flashattention_infer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/flashattention_infer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/infer_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/infer_struct.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/model.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/qwen2_visual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/qwen2_visual.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/triton_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/triton_kernel/mrope.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/triton_kernel/mrope.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/triton_kernel/rotary_pos_emb.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/triton_kernel/rotary_pos_emb.py


--------------------------------------------------------------------------------
/lightllm/models/qwen2_vl/vision_process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/vision_process.py


--------------------------------------------------------------------------------
/lightllm/models/qwen3/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen3/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen3/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/qwen3/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen3/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/qwen3/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3/model.py


--------------------------------------------------------------------------------
/lightllm/models/qwen3_moe/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen3_moe/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen3_moe/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3_moe/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/qwen3_moe/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen3_moe/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3_moe/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/qwen3_moe/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3_moe/model.py


--------------------------------------------------------------------------------
/lightllm/models/qwen_vl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen_vl/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/qwen_vl/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen_vl/model.py


--------------------------------------------------------------------------------
/lightllm/models/qwen_vl/qwen_visual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen_vl/qwen_visual.py


--------------------------------------------------------------------------------
/lightllm/models/registry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/registry.py


--------------------------------------------------------------------------------
/lightllm/models/stablelm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/stablelm/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/stablelm/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/stablelm/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/stablelm/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/stablelm/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/stablelm/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/stablelm/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/stablelm/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/stablelm/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/stablelm/model.py


--------------------------------------------------------------------------------
/lightllm/models/starcoder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/starcoder/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/starcoder/layer_infer/pre_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder/layer_infer/pre_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/starcoder/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/starcoder/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/starcoder/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/starcoder/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/starcoder/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder/model.py


--------------------------------------------------------------------------------
/lightllm/models/starcoder2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/starcoder2/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/starcoder2/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder2/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/starcoder2/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/starcoder2/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder2/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/starcoder2/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder2/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/starcoder2/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder2/model.py


--------------------------------------------------------------------------------
/lightllm/models/tarsier2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/tarsier2/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/tarsier2/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/tarsier2/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/tarsier2/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/tarsier2/model.py


--------------------------------------------------------------------------------
/lightllm/models/tarsier2/tarsier2_visual.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/tarsier2/tarsier2_visual.py


--------------------------------------------------------------------------------
/lightllm/models/vit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/__init__.py


--------------------------------------------------------------------------------
/lightllm/models/vit/layer_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/vit/layer_infer/post_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_infer/post_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/vit/layer_infer/pre_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_infer/pre_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/vit/layer_infer/transformer_layer_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_infer/transformer_layer_infer.py


--------------------------------------------------------------------------------
/lightllm/models/vit/layer_weights/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/vit/layer_weights/hf_load_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_weights/hf_load_utils.py


--------------------------------------------------------------------------------
/lightllm/models/vit/layer_weights/pre_and_post_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_weights/pre_and_post_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/vit/layer_weights/transformer_layer_weight.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_weights/transformer_layer_weight.py


--------------------------------------------------------------------------------
/lightllm/models/vit/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/model.py


--------------------------------------------------------------------------------
/lightllm/models/vit/triton_kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/vit/triton_kernel/flashattention_nopad.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/triton_kernel/flashattention_nopad.py


--------------------------------------------------------------------------------
/lightllm/models/vit/triton_kernel/gelu_vit.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/triton_kernel/gelu_vit.py


--------------------------------------------------------------------------------
/lightllm/models/vit/triton_kernel/rms_norm_vit.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/triton_kernel/rms_norm_vit.py


--------------------------------------------------------------------------------
/lightllm/models/whisper/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/models/whisper/defaults.py:
--------------------------------------------------------------------------------
1 | MIN_AUDIO_LEN = 480  # 最短音频长度
2 | 


--------------------------------------------------------------------------------
/lightllm/models/whisper/modeling_whisper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/whisper/modeling_whisper.py


--------------------------------------------------------------------------------
/lightllm/models/whisper/whisper_audio.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/whisper/whisper_audio.py


--------------------------------------------------------------------------------
/lightllm/server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/api_cli.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_cli.py


--------------------------------------------------------------------------------
/lightllm/server/api_http.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_http.py


--------------------------------------------------------------------------------
/lightllm/server/api_lightllm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_lightllm.py


--------------------------------------------------------------------------------
/lightllm/server/api_models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_models.py


--------------------------------------------------------------------------------
/lightllm/server/api_openai.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_openai.py


--------------------------------------------------------------------------------
/lightllm/server/api_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_server.py


--------------------------------------------------------------------------------
/lightllm/server/api_start.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_start.py


--------------------------------------------------------------------------------
/lightllm/server/api_tgi.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_tgi.py


--------------------------------------------------------------------------------
/lightllm/server/audioserver/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/audioserver/manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/audioserver/manager.py


--------------------------------------------------------------------------------
/lightllm/server/audioserver/model_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/audioserver/model_infer/model_rpc.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/audioserver/model_infer/model_rpc.py


--------------------------------------------------------------------------------
/lightllm/server/build_prompt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/build_prompt.py


--------------------------------------------------------------------------------
/lightllm/server/config_server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/config_server/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/config_server/api_http.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/config_server/api_http.py


--------------------------------------------------------------------------------
/lightllm/server/config_server/nccl_tcp_store.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/config_server/nccl_tcp_store.py


--------------------------------------------------------------------------------
/lightllm/server/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/core/objs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/atomic_array_lock.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/atomic_array_lock.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/atomic_lock.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/atomic_lock.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/io_objs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/io_objs/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/io_objs/group_req.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/io_objs/group_req.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/nixl_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/nixl_params.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/out_token_circlequeue.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/out_token_circlequeue.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/py_sampling_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/py_sampling_params.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/req.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/req.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/rpc_shm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/rpc_shm.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/sampling_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/sampling_params.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/shm_array.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/shm_array.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/shm_objs_io_buffer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/shm_objs_io_buffer.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/shm_req_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/shm_req_manager.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/start_args_type.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/start_args_type.py


--------------------------------------------------------------------------------
/lightllm/server/core/objs/token_chunck_hash_list.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/token_chunck_hash_list.py


--------------------------------------------------------------------------------
/lightllm/server/detokenization/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/detokenization/decode.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/detokenization/decode.py


--------------------------------------------------------------------------------
/lightllm/server/detokenization/decode_mode_fix.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/detokenization/decode_mode_fix.py


--------------------------------------------------------------------------------
/lightllm/server/detokenization/decode_req.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/detokenization/decode_req.py


--------------------------------------------------------------------------------
/lightllm/server/detokenization/manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/detokenization/manager.py


--------------------------------------------------------------------------------
/lightllm/server/embed_cache/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/embed_cache/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/embed_cache/impl/__init__.py:
--------------------------------------------------------------------------------
1 | from . import naive_memory_cache


--------------------------------------------------------------------------------
/lightllm/server/embed_cache/impl/naive_memory_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/embed_cache/impl/naive_memory_cache.py


--------------------------------------------------------------------------------
/lightllm/server/embed_cache/manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/embed_cache/manager.py


--------------------------------------------------------------------------------
/lightllm/server/embed_cache/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/embed_cache/utils.py


--------------------------------------------------------------------------------
/lightllm/server/function_call_parser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/function_call_parser.py


--------------------------------------------------------------------------------
/lightllm/server/health_monitor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/health_monitor/manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/health_monitor/manager.py


--------------------------------------------------------------------------------
/lightllm/server/httpserver/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/httpserver/async_queue.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver/async_queue.py


--------------------------------------------------------------------------------
/lightllm/server/httpserver/manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver/manager.py


--------------------------------------------------------------------------------
/lightllm/server/httpserver/pd_loop.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver/pd_loop.py


--------------------------------------------------------------------------------
/lightllm/server/httpserver_for_pd_master/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/httpserver_for_pd_master/manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver_for_pd_master/manager.py


--------------------------------------------------------------------------------
/lightllm/server/httpserver_for_pd_master/pd_selector/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver_for_pd_master/pd_selector/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/httpserver_for_pd_master/pd_selector/pd_selector.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver_for_pd_master/pd_selector/pd_selector.py


--------------------------------------------------------------------------------
/lightllm/server/httpserver_for_pd_master/register_loop.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver_for_pd_master/register_loop.py


--------------------------------------------------------------------------------
/lightllm/server/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/metrics/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/metrics/manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/metrics/manager.py


--------------------------------------------------------------------------------
/lightllm/server/metrics/metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/metrics/metrics.py


--------------------------------------------------------------------------------
/lightllm/server/multi_level_kv_cache/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/multi_level_kv_cache/cpu_cache_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/multi_level_kv_cache/cpu_cache_client.py


--------------------------------------------------------------------------------
/lightllm/server/multi_level_kv_cache/disk_cache_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/multi_level_kv_cache/disk_cache_worker.py


--------------------------------------------------------------------------------
/lightllm/server/multi_level_kv_cache/manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/multi_level_kv_cache/manager.py


--------------------------------------------------------------------------------
/lightllm/server/multi_level_kv_cache/shm_objs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/multi_level_kv_cache/shm_objs.py


--------------------------------------------------------------------------------
/lightllm/server/multimodal_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/multimodal_params.py


--------------------------------------------------------------------------------
/lightllm/server/pd_io_struct.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/pd_io_struct.py


--------------------------------------------------------------------------------
/lightllm/server/req_id_generator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/req_id_generator.py


--------------------------------------------------------------------------------
/lightllm/server/router/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/router/batch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/batch.py


--------------------------------------------------------------------------------
/lightllm/server/router/dynamic_prompt/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/router/dynamic_prompt/radix_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/dynamic_prompt/radix_cache.py


--------------------------------------------------------------------------------
/lightllm/server/router/dynamic_prompt/shared_arr.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/dynamic_prompt/shared_arr.py


--------------------------------------------------------------------------------
/lightllm/server/router/manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/manager.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/infer_batch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/infer_batch.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/base_backend.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/base_backend.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/chunked_prefill/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/chunked_prefill/control_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/chunked_prefill/control_state.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_reward_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_reward_model.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_token_healing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_token_healing.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/continues_batch/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/p2p_fix.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/p2p_fix.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/task_queue.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/task_queue.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/utils.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/diverse_backend/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/diverse_backend/impl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/diverse_backend/impl.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/dp_backend/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/dp_backend/control_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/dp_backend/control_state.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/dp_backend/dp_shared_kv_trans.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/dp_backend/dp_shared_kv_trans.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/dp_backend/impl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/dp_backend/impl.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/generic_padded_pre_process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/generic_padded_pre_process.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/generic_post_process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/generic_post_process.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/generic_pre_process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/generic_pre_process.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/mtp_pre_process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/mtp_pre_process.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/multi_level_kv_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/multi_level_kv_cache.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/overlap_events.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/overlap_events.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pd_nixl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pd_nixl/base_kv_move_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/base_kv_move_manager.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/decode_impl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/decode_impl.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/decode_impl_for_dp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/decode_impl_for_dp.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/up_status.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/up_status.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pd_nixl/nixl_kv_transporter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/nixl_kv_transporter.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/prefill_impl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/prefill_impl.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pd_nixl/trans_process_obj.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/trans_process_obj.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/pre.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pre.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/mode_backend/redundancy_expert_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/redundancy_expert_manager.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/model_rpc.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/model_rpc.py


--------------------------------------------------------------------------------
/lightllm/server/router/model_infer/pin_mem_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/pin_mem_manager.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/base_queue.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/base_queue.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/chunked_prefill/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/chunked_prefill/beam_impl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/chunked_prefill/beam_impl.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/chunked_prefill/impl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/chunked_prefill/impl.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/chunked_prefill/impl_for_nixl_pd.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/chunked_prefill/impl_for_nixl_pd.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/chunked_prefill/impl_for_pd_decode.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/chunked_prefill/impl_for_pd_decode.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/dp_balancer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/dp_balancer/__init__.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/dp_balancer/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/dp_balancer/base.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/dp_balancer/bs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/dp_balancer/bs.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/dp_balancer/roundrobin.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/dp_balancer/roundrobin.py


--------------------------------------------------------------------------------
/lightllm/server/router/req_queue/dp_base_queue.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/dp_base_queue.py


--------------------------------------------------------------------------------
/lightllm/server/router/stats.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/stats.py


--------------------------------------------------------------------------------
/lightllm/server/router/token_load.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/token_load.py


--------------------------------------------------------------------------------
/lightllm/server/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/tokenizer.py


--------------------------------------------------------------------------------
/lightllm/server/visualserver/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/visualserver/manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/visualserver/manager.py


--------------------------------------------------------------------------------
/lightllm/server/visualserver/model_infer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/server/visualserver/model_infer/model_rpc.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/visualserver/model_infer/model_rpc.py


--------------------------------------------------------------------------------
/lightllm/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lightllm/utils/auto_shm_cleanup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/auto_shm_cleanup.py


--------------------------------------------------------------------------------
/lightllm/utils/config_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/config_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/custom_kernel_utis.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/custom_kernel_utis.py


--------------------------------------------------------------------------------
/lightllm/utils/device_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/device_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/dist_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/dist_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/envs_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/envs_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/error_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/error_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/graceful_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/graceful_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/health_check.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/health_check.py


--------------------------------------------------------------------------------
/lightllm/utils/infer_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/infer_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/kv_cache_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/kv_cache_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/light_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/light_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/llm_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/llm_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/log_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/log_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/multimodal_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/multimodal_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/multinode_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/multinode_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/net_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/net_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/petrel_helper.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/petrel_helper.py


--------------------------------------------------------------------------------
/lightllm/utils/process_check.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/process_check.py


--------------------------------------------------------------------------------
/lightllm/utils/profile_max_tokens.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/profile_max_tokens.py


--------------------------------------------------------------------------------
/lightllm/utils/retry_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/retry_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/rpyc_fix_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/rpyc_fix_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/sgl_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/sgl_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/shm_size_check.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/shm_size_check.py


--------------------------------------------------------------------------------
/lightllm/utils/shm_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/shm_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/start_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/start_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/statics_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/statics_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/time_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/time_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/torch_ops_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/torch_ops_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/tuning_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/tuning_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/vllm_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/vllm_utils.py


--------------------------------------------------------------------------------
/lightllm/utils/watchdog_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/watchdog_utils.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/requirements.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/setup.py


--------------------------------------------------------------------------------
/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_14b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_14b.json


--------------------------------------------------------------------------------
/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_32b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_32b.json


--------------------------------------------------------------------------------
/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_72b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_72b.json


--------------------------------------------------------------------------------
/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_235b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_235b.json


--------------------------------------------------------------------------------
/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_30b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_30b.json


--------------------------------------------------------------------------------
/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_14b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_14b.json


--------------------------------------------------------------------------------
/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_32b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_32b.json


--------------------------------------------------------------------------------
/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_72b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_72b.json


--------------------------------------------------------------------------------
/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen3_30b.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen3_30b.json


--------------------------------------------------------------------------------
/test/advanced_config/mixed_quantization/llamacls-mix-down.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/mixed_quantization/llamacls-mix-down.yaml


--------------------------------------------------------------------------------
/test/advanced_config/redundancy_expert/test_redundancy_expert_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/redundancy_expert/test_redundancy_expert_config.json


--------------------------------------------------------------------------------
/test/benchmark/kernel/benchmark_fused_moe_triton.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/kernel/benchmark_fused_moe_triton.py


--------------------------------------------------------------------------------
/test/benchmark/service/benchmark_client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_client.py


--------------------------------------------------------------------------------
/test/benchmark/service/benchmark_mcq.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_mcq.py


--------------------------------------------------------------------------------
/test/benchmark/service/benchmark_prompt_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_prompt_cache.py


--------------------------------------------------------------------------------
/test/benchmark/service/benchmark_prompt_cache_multi_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_prompt_cache_multi_server.py


--------------------------------------------------------------------------------
/test/benchmark/service/benchmark_qps.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_qps.py


--------------------------------------------------------------------------------
/test/benchmark/service/benchmark_sharegpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_sharegpt.py


--------------------------------------------------------------------------------
/test/benchmark/static_inference/model_infer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/static_inference/model_infer.py


--------------------------------------------------------------------------------
/test/benchmark/static_inference/model_infer_mtp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/static_inference/model_infer_mtp.py


--------------------------------------------------------------------------------
/test/benchmark/static_inference/profile_demo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/static_inference/profile_demo.py


--------------------------------------------------------------------------------
/test/benchmark/static_inference/test_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/static_inference/test_model.py


--------------------------------------------------------------------------------
/test/benchmark/static_inference/test_vit.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/static_inference/test_vit.py


--------------------------------------------------------------------------------
/test/chat_template/tool_chat_template_deepseekr1.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/chat_template/tool_chat_template_deepseekr1.jinja


--------------------------------------------------------------------------------
/test/chat_template/tool_chat_template_deepseekv3.jinjia:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/chat_template/tool_chat_template_deepseekv3.jinjia


--------------------------------------------------------------------------------
/test/chat_template/tool_chat_template_deepseekv31.jinja:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/chat_template/tool_chat_template_deepseekv31.jinja


--------------------------------------------------------------------------------
/test/chat_template/tool_chat_template_deepseekv32.jinjia:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/chat_template/tool_chat_template_deepseekv32.jinjia


--------------------------------------------------------------------------------
/test/compare_with_previous_commit.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/compare_with_previous_commit.py


--------------------------------------------------------------------------------
/test/format_out/gomoku_game.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/format_out/gomoku_game.py


--------------------------------------------------------------------------------
/test/format_out/qabot.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/format_out/qabot.py


--------------------------------------------------------------------------------
/test/format_out/test_constraint_server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/format_out/test_constraint_server.py


--------------------------------------------------------------------------------
/test/format_out/test_demo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/format_out/test_demo.py


--------------------------------------------------------------------------------
/test/format_out/test_xgrammar_constraint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/format_out/test_xgrammar_constraint.py


--------------------------------------------------------------------------------
/test/kernel/deepseekv2_bmm_scaled_fp8_tuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/deepseekv2_bmm_scaled_fp8_tuning.py


--------------------------------------------------------------------------------
/test/kernel/deepseekv2_gqa_decode_tuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/deepseekv2_gqa_decode_tuning.py


--------------------------------------------------------------------------------
/test/kernel/deepseekv3_fp8_block_gemm_tuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/deepseekv3_fp8_block_gemm_tuning.py


--------------------------------------------------------------------------------
/test/kernel/deepseekv3_rotary_emb_tuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/deepseekv3_rotary_emb_tuning.py


--------------------------------------------------------------------------------
/test/kernel/fuse_moe_tuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/fuse_moe_tuning.py


--------------------------------------------------------------------------------
/test/kernel/llama_gqa_decode_vsm_tuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/llama_gqa_decode_vsm_tuning.py


--------------------------------------------------------------------------------
/test/kernel/llama_gqa_diverse_decode_stage1_tuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/llama_gqa_diverse_decode_stage1_tuning.py


--------------------------------------------------------------------------------
/test/kernel/moe_silu_and_mul_tuning_bf16.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/moe_silu_and_mul_tuning_bf16.py


--------------------------------------------------------------------------------
/test/kernel/moe_sum_reduce_tuning_bf16.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/moe_sum_reduce_tuning_bf16.py


--------------------------------------------------------------------------------
/test/start_scripts/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/README.md


--------------------------------------------------------------------------------
/test/start_scripts/draft.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/draft.sh


--------------------------------------------------------------------------------
/test/start_scripts/multi_node_ep_node0.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_node_ep_node0.sh


--------------------------------------------------------------------------------
/test/start_scripts/multi_node_ep_node1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_node_ep_node1.sh


--------------------------------------------------------------------------------
/test/start_scripts/multi_node_tp_node0.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_node_tp_node0.sh


--------------------------------------------------------------------------------
/test/start_scripts/multi_node_tp_node1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_node_tp_node1.sh


--------------------------------------------------------------------------------
/test/start_scripts/multi_pd_master.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master.sh


--------------------------------------------------------------------------------
/test/start_scripts/multi_pd_master/config_server.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master/config_server.sh


--------------------------------------------------------------------------------
/test/start_scripts/multi_pd_master/pd_decode.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master/pd_decode.sh


--------------------------------------------------------------------------------
/test/start_scripts/multi_pd_master/pd_master_1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master/pd_master_1.sh


--------------------------------------------------------------------------------
/test/start_scripts/multi_pd_master/pd_master_2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master/pd_master_2.sh


--------------------------------------------------------------------------------
/test/start_scripts/multi_pd_master/pd_prefill.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master/pd_prefill.sh


--------------------------------------------------------------------------------
/test/start_scripts/single_node_ep.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_node_ep.sh


--------------------------------------------------------------------------------
/test/start_scripts/single_node_tp.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_node_tp.sh


--------------------------------------------------------------------------------
/test/start_scripts/single_node_tp_cpu_cache_enable.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_node_tp_cpu_cache_enable.sh


--------------------------------------------------------------------------------
/test/start_scripts/single_pd_master/pd_decode.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_pd_master/pd_decode.sh


--------------------------------------------------------------------------------
/test/start_scripts/single_pd_master/pd_master.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_pd_master/pd_master.sh


--------------------------------------------------------------------------------
/test/start_scripts/single_pd_master/pd_nixl_decode.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_pd_master/pd_nixl_decode.sh


--------------------------------------------------------------------------------
/test/start_scripts/single_pd_master/pd_nixl_prefill.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_pd_master/pd_nixl_prefill.sh


--------------------------------------------------------------------------------
/test/start_scripts/single_pd_master/pd_prefill.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_pd_master/pd_prefill.sh


--------------------------------------------------------------------------------
/test/test_api/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/test_api/test.jpg


--------------------------------------------------------------------------------
/test/test_api/test_generate_api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/test_api/test_generate_api.py


--------------------------------------------------------------------------------
/test/test_api/test_multimodal_api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/test_api/test_multimodal_api.py


--------------------------------------------------------------------------------
/test/test_api/test_openai_api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/test_api/test_openai_api.py


--------------------------------------------------------------------------------
/tools/quick_launch_docker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/tools/quick_launch_docker.py


--------------------------------------------------------------------------------
/tools/resolve_ptx_version:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/tools/resolve_ptx_version


--------------------------------------------------------------------------------
/unit_tests/common/basemodel/triton_kernel/test_add_in_place.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_add_in_place.py


--------------------------------------------------------------------------------
/unit_tests/common/basemodel/triton_kernel/test_gen_decode_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_gen_decode_params.py


--------------------------------------------------------------------------------
/unit_tests/common/basemodel/triton_kernel/test_gen_mtp_prefill_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_gen_mtp_prefill_params.py


--------------------------------------------------------------------------------
/unit_tests/common/basemodel/triton_kernel/test_gen_prefill_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_gen_prefill_params.py


--------------------------------------------------------------------------------
/unit_tests/common/basemodel/triton_kernel/test_gen_sampling_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_gen_sampling_params.py


--------------------------------------------------------------------------------
/unit_tests/common/basemodel/triton_kernel/test_multimodal_emb.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_multimodal_emb.py


--------------------------------------------------------------------------------
/unit_tests/common/basemodel/triton_kernel/test_redundancy_topk_ids_repair.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_redundancy_topk_ids_repair.py


--------------------------------------------------------------------------------
/unit_tests/common/basemodel/triton_kernel/test_sp_pad_kernel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_sp_pad_kernel.py


--------------------------------------------------------------------------------
/unit_tests/common/fused_moe/test_deepep.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_deepep.py


--------------------------------------------------------------------------------
/unit_tests/common/fused_moe/test_grouped_fused_moe.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_grouped_fused_moe.py


--------------------------------------------------------------------------------
/unit_tests/common/fused_moe/test_grouped_fused_moe_speed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_grouped_fused_moe_speed.py


--------------------------------------------------------------------------------
/unit_tests/common/fused_moe/test_grouped_topk.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_grouped_topk.py


--------------------------------------------------------------------------------
/unit_tests/common/fused_moe/test_moe_silu_and_mul_mix_quant_ep.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_moe_silu_and_mul_mix_quant_ep.py


--------------------------------------------------------------------------------
/unit_tests/common/fused_moe/test_softmax_topk.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_softmax_topk.py


--------------------------------------------------------------------------------
/unit_tests/common/kv_trans_kernel/test_kv_trans_v2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/kv_trans_kernel/test_kv_trans_v2.py


--------------------------------------------------------------------------------
/unit_tests/common/kv_trans_kernel/test_nixl_kv_trans.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/kv_trans_kernel/test_nixl_kv_trans.py


--------------------------------------------------------------------------------
/unit_tests/common/quantization/test_fp8_scaled_mm_per_token.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/quantization/test_fp8_scaled_mm_per_token.py


--------------------------------------------------------------------------------
/unit_tests/models/deepseek2/test_destindex_copy_kv.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_destindex_copy_kv.py


--------------------------------------------------------------------------------
/unit_tests/models/deepseek2/test_destindex_copy_kv_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_destindex_copy_kv_fp8.py


--------------------------------------------------------------------------------
/unit_tests/models/deepseek2/test_gqa_flash_decoding.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_gqa_flash_decoding.py


--------------------------------------------------------------------------------
/unit_tests/models/deepseek2/test_gqa_flash_decoding_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_gqa_flash_decoding_fp8.py


--------------------------------------------------------------------------------
/unit_tests/models/deepseek2/test_repack_kv_index.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_repack_kv_index.py


--------------------------------------------------------------------------------
/unit_tests/models/deepseek2/test_rope_repeat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_rope_repeat.py


--------------------------------------------------------------------------------
/unit_tests/models/llama/test_context_flashattention_nopad.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_context_flashattention_nopad.py


--------------------------------------------------------------------------------
/unit_tests/models/llama/test_context_flashattention_nopad_fa3_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_context_flashattention_nopad_fa3_fp8.py


--------------------------------------------------------------------------------
/unit_tests/models/llama/test_context_flashattention_nopad_flashinfer_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_context_flashattention_nopad_flashinfer_fp8.py


--------------------------------------------------------------------------------
/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse.py


--------------------------------------------------------------------------------
/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage1.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage1.py


--------------------------------------------------------------------------------
/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage2.py


--------------------------------------------------------------------------------
/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage3.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage3.py


--------------------------------------------------------------------------------
/unit_tests/models/llama/test_token_attention_nopad.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_token_attention_nopad.py


--------------------------------------------------------------------------------
/unit_tests/models/llama/test_token_attention_nopad_fa3_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_token_attention_nopad_fa3_fp8.py


--------------------------------------------------------------------------------
/unit_tests/models/llama/test_token_attention_nopad_flashinfer_fp8.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_token_attention_nopad_flashinfer_fp8.py


--------------------------------------------------------------------------------
/unit_tests/models/qwen2_vl/test_mrope.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/qwen2_vl/test_mrope.py


--------------------------------------------------------------------------------
/unit_tests/models/qwen2_vl/test_rotary_pos_emb.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/qwen2_vl/test_rotary_pos_emb.py


--------------------------------------------------------------------------------
/unit_tests/models/vit/test_flash_attention_forward.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/vit/test_flash_attention_forward.py


--------------------------------------------------------------------------------
/unit_tests/server/core/objs/test_atomic_array_lock.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_atomic_array_lock.py


--------------------------------------------------------------------------------
/unit_tests/server/core/objs/test_atomic_lock.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_atomic_lock.py


--------------------------------------------------------------------------------
/unit_tests/server/core/objs/test_out_token_circlequeue.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_out_token_circlequeue.py


--------------------------------------------------------------------------------
/unit_tests/server/core/objs/test_req.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_req.py


--------------------------------------------------------------------------------
/unit_tests/server/core/objs/test_sampling_params.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_sampling_params.py


--------------------------------------------------------------------------------
/unit_tests/server/core/objs/test_shm_array.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_shm_array.py


--------------------------------------------------------------------------------
/unit_tests/server/core/objs/test_shm_req_manager.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_shm_req_manager.py


--------------------------------------------------------------------------------
/unit_tests/server/router/dynamic_prompt/test_radix_cache.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/router/dynamic_prompt/test_radix_cache.py


--------------------------------------------------------------------------------
/unit_tests/utils/test_custom_kernel_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/utils/test_custom_kernel_utils.py


--------------------------------------------------------------------------------