├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md └── workflows │ ├── docker-publish.yml │ └── pre-commit.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── assets ├── att.gif ├── lightllm.drawio.png ├── logo.png └── logo_new.png ├── benchmark.md ├── demos ├── qa_server │ ├── __init__.py │ ├── chat_server.py │ ├── qabot.py │ └── templates │ │ └── chat.html └── readme.txt ├── docker ├── Dockerfile ├── Dockerfile.deepep ├── Dockerfile.nixl ├── Dockerfile.nixl.deepep ├── cuda_version_12.6.1 │ ├── Dockerfile │ ├── Dockerfile.deepep │ ├── Dockerfile.nixl │ └── Dockerfile.nixl.deepep └── cuda_version_12.8.0 │ ├── Dockerfile │ ├── Dockerfile.deepep │ ├── Dockerfile.nixl │ ├── Dockerfile.nixl.deepep │ └── Dockerfile.nixl.deepep.cache ├── docs ├── CN │ ├── .readthedocs.yaml │ ├── Makefile │ ├── README.md │ ├── make.bat │ ├── rebuild.sh │ ├── requirements-docs.txt │ └── source │ │ ├── assets │ │ ├── lightllm │ │ │ ├── ER1.png │ │ │ ├── ER2.png │ │ │ ├── ER3.png │ │ │ ├── ER4.png │ │ │ ├── HttpServer.png │ │ │ ├── Performance.png │ │ │ ├── Performance2.png │ │ │ ├── Router.png │ │ │ ├── Visual_Server.png │ │ │ ├── arch.png │ │ │ ├── backend.png │ │ │ └── token_attn.gif │ │ └── logos │ │ │ └── lightllm-logo.png │ │ ├── conf.py │ │ ├── framework │ │ ├── framework.rst │ │ ├── router.rst │ │ └── token_attention.rst │ │ ├── getting_started │ │ ├── benchmark.rst │ │ ├── installation.rst │ │ └── quickstart.rst │ │ ├── index.rst │ │ ├── models │ │ ├── add_new_model.md │ │ └── supported_models.rst │ │ └── tutorial │ │ ├── api_param.rst │ │ ├── api_server_args_zh.rst │ │ ├── deepseek_deployment.rst │ │ ├── multi_level_cache_deployment.rst │ │ ├── multimodal.rst │ │ ├── openai.rst │ │ └── reward_model.rst └── EN │ ├── .readthedocs.yaml │ ├── Makefile │ ├── README.md │ ├── make.bat │ ├── rebuild.sh │ ├── requirements-docs.txt │ └── source │ ├── assets │ ├── lightllm │ │ ├── ER1.png │ │ ├── ER2.png │ │ ├── ER3.png │ │ ├── ER4.png │ │ ├── HttpServer.png │ │ ├── Performance.png │ │ ├── Performance2.png │ │ ├── Router.png │ │ ├── Visual_Server.png │ │ ├── arch.png │ │ ├── backend.png │ │ └── token_attn.gif │ └── logos │ │ └── lightllm-logo.png │ ├── conf.py │ ├── framework │ ├── framework.rst │ ├── router.rst │ └── token_attention.rst │ ├── getting_started │ ├── benchmark.rst │ ├── installation.rst │ └── quickstart.rst │ ├── index.rst │ ├── models │ ├── add_new_model.md │ └── supported_models.rst │ └── tutorial │ ├── api_param.rst │ ├── api_server_args_zh.rst │ ├── deepseek_deployment.rst │ ├── multi_level_cache_deployment.rst │ ├── multimodal.rst │ ├── openai.rst │ └── reward_model.rst ├── format.py ├── format_out ├── __init__.py ├── grammer │ ├── __init__.py │ ├── core.py │ ├── dpda.py │ ├── json.ebnf │ ├── test.sh │ ├── test0.py │ ├── test1.py │ ├── test2.py │ ├── test3.py │ ├── test4.py │ ├── test5.py │ └── test6.py └── impl.py ├── lightllm ├── __init__.py ├── common │ ├── __init__.py │ ├── all_kernel_configs │ │ ├── __init__.py │ │ ├── _fwd_kernel_flash_decode_diverse_stage1:v1 │ │ │ ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_H200.json │ │ │ ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {block_seq=256,gqa_group_size=16,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_H200.json │ │ │ ├── {block_seq=256,gqa_group_size=2,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ ├── {block_seq=256,gqa_group_size=2,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {block_seq=256,gqa_group_size=2,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ ├── {block_seq=256,gqa_group_size=2,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_H200.json │ │ │ ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {block_seq=256,gqa_group_size=4,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_H200.json │ │ │ ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_H200.json │ │ │ ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {block_seq=256,gqa_group_size=5,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_H200.json │ │ │ ├── {block_seq=256,gqa_group_size=8,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ ├── {block_seq=256,gqa_group_size=8,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {block_seq=256,gqa_group_size=8,out_dtype=torch.bfloat16,q_head_dim=128}_NVIDIA_H200.json │ │ │ ├── {block_seq=256,gqa_group_size=8,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ ├── {block_seq=256,gqa_group_size=8,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_GeForce_RTX_5090.json │ │ │ └── {block_seq=256,gqa_group_size=8,out_dtype=torch.float16,q_head_dim=128}_NVIDIA_H200.json │ │ ├── bmm_scaled_fp8 │ │ │ ├── {B=16,K=128,M=1,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=128,M=1,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=128,M=128,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=128,M=128,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=128,M=16,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=128,M=16,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=128,M=2,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=128,M=2,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=128,M=256,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=128,M=256,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=128,M=32,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=128,M=32,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=128,M=4,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=128,M=4,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=128,M=512,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=128,M=512,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=128,M=64,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=128,M=64,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=128,M=8,N=512,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=128,M=8,N=512,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=512,M=1,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=512,M=1,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=512,M=128,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=512,M=128,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=512,M=16,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=512,M=16,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=512,M=2,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=512,M=2,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=512,M=256,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=512,M=256,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=512,M=32,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=512,M=32,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=512,M=4,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=512,M=4,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=512,M=512,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=512,M=512,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=512,M=64,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {B=16,K=512,M=64,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {B=16,K=512,M=8,N=128,out_dtype=torch.bfloat16}_NVIDIA_H100_80GB_HBM3.json │ │ │ └── {B=16,K=512,M=8,N=128,out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ ├── deepseek_v3_rotary_emb_kernel │ │ │ ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=128,dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=16,dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=32,dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=64,dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ └── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=8,dtype=torch.bfloat16}_NVIDIA_H200.json │ │ ├── fp8_block_mm │ │ │ ├── {K=1024,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=1024,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=1152,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=1152,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=1536,N=1536,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=1536,N=1536,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=16384,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=16384,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=18432,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=18432,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=2048,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=2048,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=2304,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=2304,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=256,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=256,N=7168,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=512,N=2048,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=512,N=2048,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=512,N=32768,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=512,N=32768,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=512,N=4096,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=512,N=4096,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=7168,N=1536,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=1536,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=7168,N=2304,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=2304,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=7168,N=24576,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=24576,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=7168,N=256,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=256,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=7168,N=36864,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=36864,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=7168,N=512,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=512,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=7168,N=576,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=576,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ │ ├── {K=7168,N=8072,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ └── {K=7168,N=8072,block_size=[128,128],out_dtype=torch.bfloat16}_NVIDIA_H800.json │ │ ├── grouped_moe_gemm_kernel │ │ │ ├── {K=128,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {K=128,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ ├── {K=1408,N=2048,expert_num=64,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {K=192,N=5120,expert_num=160,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H800.json │ │ │ ├── {K=2048,N=1408,expert_num=64,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {K=2048,N=1408,expert_num=64,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_H800.json │ │ │ ├── {K=2048,N=192,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H200.json │ │ │ ├── {K=2048,N=2816,expert_num=64,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {K=256,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {K=256,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ ├── {K=256,N=7168,expert_num=257,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ ├── {K=4096,N=192,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H200.json │ │ │ ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {K=512,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ ├── {K=5120,N=384,expert_num=160,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=true}_NVIDIA_H800.json │ │ │ ├── {K=704,N=2048,expert_num=64,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {K=704,N=2048,expert_num=64,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H800.json │ │ │ ├── {K=7168,N=1024,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=1024,expert_num=257,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=256,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {K=7168,N=256,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=512,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H100_80GB_HBM3.json │ │ │ ├── {K=7168,N=512,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ ├── {K=7168,N=512,expert_num=257,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ ├── {K=96,N=2048,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H200.json │ │ │ └── {K=96,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H100_80GB_HBM3.json │ │ ├── mla_decode_attentnion │ │ │ ├── {out_dtype=torch.bfloat16,q_head_dim=512,q_head_num=128,q_rope_dim=64}_NVIDIA_H800.json │ │ │ ├── {out_dtype=torch.bfloat16,q_head_dim=512,q_head_num=16,q_rope_dim=64}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {out_dtype=torch.bfloat16,q_head_dim=512,q_head_num=16,q_rope_dim=64}_NVIDIA_H200.json │ │ │ └── {out_dtype=torch.bfloat16,q_head_dim=512,q_head_num=16,q_rope_dim=64}_NVIDIA_H800.json │ │ ├── moe_silu_and_mul_kernel │ │ │ ├── {N=1024,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {N=128,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {N=1408,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {N=192,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {N=2048,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {N=2304,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {N=256,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {N=4096,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ ├── {N=512,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ └── {N=8192,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ └── moe_sum_reduce_kernel │ │ │ ├── {hidden_dim=1024,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {hidden_dim=1024,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_H800.json │ │ │ ├── {hidden_dim=1024,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {hidden_dim=1024,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_H800.json │ │ │ ├── {hidden_dim=2048,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {hidden_dim=2048,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_H800.json │ │ │ ├── {hidden_dim=2048,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {hidden_dim=2048,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_H800.json │ │ │ ├── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_H800.json │ │ │ ├── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_H800.json │ │ │ ├── {hidden_dim=5120,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {hidden_dim=5120,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_H800.json │ │ │ ├── {hidden_dim=5120,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {hidden_dim=5120,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_H800.json │ │ │ ├── {hidden_dim=8192,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_A800-SXM4-80GB.json │ │ │ ├── {hidden_dim=8192,out_dtype=torch.bfloat16,topk_num=1}_NVIDIA_H800.json │ │ │ ├── {hidden_dim=8192,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_A800-SXM4-80GB.json │ │ │ └── {hidden_dim=8192,out_dtype=torch.bfloat16,topk_num=6}_NVIDIA_H800.json │ ├── basemodel │ │ ├── __init__.py │ │ ├── basemodel.py │ │ ├── batch_objs.py │ │ ├── cuda_graph.py │ │ ├── infer_lock.py │ │ ├── infer_struct.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ ├── base_layer_infer.py │ │ │ ├── cache_tensor_manager.py │ │ │ ├── post_layer_infer.py │ │ │ ├── pre_layer_infer.py │ │ │ ├── template │ │ │ │ ├── __init__.py │ │ │ │ ├── post_layer_infer_template.py │ │ │ │ ├── pre_layer_infer_template.py │ │ │ │ ├── transformer_layer_infer_cohere_template.py │ │ │ │ └── transformer_layer_infer_template.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── base_layer_weight.py │ │ │ ├── hf_load_utils.py │ │ │ ├── meta_weights │ │ │ │ ├── __init__.py │ │ │ │ ├── base_weight.py │ │ │ │ ├── fused_moe_weight_ep.py │ │ │ │ ├── fused_moe_weight_ep_redundancy.py │ │ │ │ ├── fused_moe_weight_tp.py │ │ │ │ ├── gpt_oss_fused_moe_weight_tp.py │ │ │ │ ├── mm_weight │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── colmm_weight.py │ │ │ │ │ ├── mm_factory.py │ │ │ │ │ ├── mm_slicer.py │ │ │ │ │ ├── mm_weight.py │ │ │ │ │ └── rowmm_weight.py │ │ │ │ └── norm_weight.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ ├── multimodal_tokenizer.py │ │ └── triton_kernel │ │ │ ├── __init__.py │ │ │ ├── add_in_place.py │ │ │ ├── apply_penalty.py │ │ │ ├── apply_penalty_gpu_cache.py │ │ │ ├── bmm_scaled_fp8.py │ │ │ ├── copy_kv_index_to_req.py │ │ │ ├── dequantize_gemm_int4.py │ │ │ ├── dequantize_gemm_int8.py │ │ │ ├── destindex_copy_kv.py │ │ │ ├── destindex_copy_kv_fp8.py │ │ │ ├── fa3_utils.py │ │ │ ├── gather_token_id.py │ │ │ ├── gen_decode_params.py │ │ │ ├── gen_mtp_prefill_params.py │ │ │ ├── gen_prefill_params.py │ │ │ ├── gen_sampling_params.py │ │ │ ├── kv_cache_offload.py │ │ │ ├── mtp_utils.py │ │ │ ├── multimodal_emb.py │ │ │ ├── q_per_head_fp8_quant.py │ │ │ ├── quantize_gemm_int8.py │ │ │ ├── redundancy_topk_ids_repair.py │ │ │ └── sp_pad_copy.py │ ├── build_utils.py │ ├── cuda_wrapper.py │ ├── fused_moe │ │ ├── __init__.py │ │ ├── deepep_scatter_gather.py │ │ ├── grouped_fused_moe.py │ │ ├── grouped_fused_moe_ep.py │ │ ├── grouped_topk.py │ │ ├── moe_kernel_configs.py │ │ ├── moe_silu_and_mul.py │ │ ├── moe_silu_and_mul_config.py │ │ ├── moe_silu_and_mul_mix_quant_ep.py │ │ ├── moe_sum_recude_config.py │ │ ├── moe_sum_reduce.py │ │ ├── softmax_topk.py │ │ └── topk_select.py │ ├── infer_utils.py │ ├── kernel_config.py │ ├── kv_cache_mem_manager │ │ ├── __init__.py │ │ ├── calibration_fp8kv_mem_manager.py │ │ ├── deepseek2_fp8kv_mem_manager.py │ │ ├── deepseek2_mem_manager.py │ │ ├── export_calibration_mem_manager.py │ │ ├── int8kv_mem_manager.py │ │ ├── mem_manager.py │ │ ├── mem_utils.py │ │ ├── offline_fp8_quant_mem_manager.py │ │ ├── ppl_int4kv_mem_manager.py │ │ └── ppl_int8kv_mem_manager.py │ ├── kv_trans_kernel │ │ ├── __init__.py │ │ ├── kv_trans.py │ │ ├── kv_trans_v2.py │ │ └── nixl_kv_trans.py │ ├── quantization │ │ ├── __init__.py │ │ ├── awq_quant.py │ │ ├── deepgemm_quant.py │ │ ├── quantize_method.py │ │ ├── registry.py │ │ ├── torchao_quant.py │ │ ├── triton_quant │ │ │ ├── __init__.py │ │ │ ├── fp8 │ │ │ │ ├── __init__.py │ │ │ │ ├── fp8act_quant_kernel.py │ │ │ │ ├── fp8w8a8_block_gemm_kernel.py │ │ │ │ ├── fp8w8a8_block_quant_kernel.py │ │ │ │ └── fp8w8a8_scaled_mm_per_token_kernel.py │ │ │ └── triton_quant.py │ │ └── w8a8_quant.py │ ├── req_manager.py │ └── triton_utils │ │ ├── __init__.py │ │ ├── autotune_kernel_configs │ │ ├── triton_3.3.1 │ │ │ └── NVIDIA_H200 │ │ │ │ ├── grouped_matmul:v1 │ │ │ │ ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H200.json │ │ │ │ ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ │ ├── {K=256,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ │ ├── {K=256,N=7168,expert_num=257,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ │ ├── {K=384,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H200.json │ │ │ │ ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H200.json │ │ │ │ ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ │ ├── {K=4096,N=768,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H200.json │ │ │ │ ├── {K=7168,N=512,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ │ └── {K=7168,N=512,expert_num=257,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=9,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ │ ├── moe_sum_reduce:v1 │ │ │ │ ├── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=8}_NVIDIA_H200.json │ │ │ │ ├── {hidden_dim=7168,out_dtype=torch.bfloat16,topk_num=8}_NVIDIA_H200.json │ │ │ │ └── {hidden_dim=7168,out_dtype=torch.bfloat16,topk_num=9}_NVIDIA_H200.json │ │ │ │ ├── rotary_emb_fwd:v1 │ │ │ │ ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=128,dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ └── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=16,dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ └── silu_and_mul_fwd:v1 │ │ │ │ ├── {N=1536,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ ├── {N=18432,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ ├── {N=192,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ ├── {N=2048,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ ├── {N=2304,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ ├── {N=256,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ └── {N=384,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ ├── triton_3.4.0 │ │ │ ├── NVIDIA_H20 │ │ │ │ ├── grouped_matmul:v1 │ │ │ │ │ ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H20.json │ │ │ │ │ ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H20.json │ │ │ │ │ ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H20.json │ │ │ │ │ └── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H20.json │ │ │ │ ├── moe_align_fused:v1 │ │ │ │ │ └── {topk_num=8}_NVIDIA_H20.json │ │ │ │ ├── moe_sum_reduce:v1 │ │ │ │ │ └── {hidden_dim=4096,out_dtype=torch.bfloat16,topk_num=8}_NVIDIA_H20.json │ │ │ │ └── silu_and_mul_fwd:v1 │ │ │ │ │ ├── {N=1536,out_dtype=torch.bfloat16}_NVIDIA_H20.json │ │ │ │ │ └── {N=192,out_dtype=torch.bfloat16}_NVIDIA_H20.json │ │ │ └── NVIDIA_H200 │ │ │ │ ├── grouped_matmul:v1 │ │ │ │ ├── {K=192,N=4096,expert_num=128,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_H200.json │ │ │ │ ├── {K=256,N=7168,expert_num=256,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ │ ├── {K=256,N=7168,expert_num=257,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ │ ├── {K=4096,N=384,expert_num=128,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=false}_NVIDIA_H200.json │ │ │ │ ├── {K=7168,N=512,expert_num=256,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=8,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ │ └── {K=7168,N=512,expert_num=257,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=9,use_fp8_w8a8=true}_NVIDIA_H200.json │ │ │ │ ├── moe_align_fused:v1 │ │ │ │ ├── {topk_num=8}_NVIDIA_H200.json │ │ │ │ └── {topk_num=9}_NVIDIA_H200.json │ │ │ │ ├── moe_sum_reduce:v1 │ │ │ │ ├── {hidden_dim=7168,out_dtype=torch.bfloat16,topk_num=8}_NVIDIA_H200.json │ │ │ │ └── {hidden_dim=7168,out_dtype=torch.bfloat16,topk_num=9}_NVIDIA_H200.json │ │ │ │ ├── rotary_emb_fwd:v1 │ │ │ │ ├── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=128,dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ └── {HEAD_DIM=64,K_HEAD_NUM=1,Q_HEAD_NUM=16,dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ └── silu_and_mul_fwd:v1 │ │ │ │ ├── {N=18432,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ ├── {N=2048,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ ├── {N=2304,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ │ │ └── {N=256,out_dtype=torch.bfloat16}_NVIDIA_H200.json │ │ └── triton_3.5.1 │ │ │ ├── NVIDIA_GeForce_RTX_4090_D │ │ │ └── fp8_scaled_mm_per_token:v3 │ │ │ │ ├── {K=14336,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ │ ├── {K=4096,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ │ ├── {K=5120,N=2048,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ │ ├── {K=5120,N=28672,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ │ └── {K=5120,N=4096,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_4090_D.json │ │ │ └── NVIDIA_GeForce_RTX_5090 │ │ │ ├── fp8_scaled_mm_per_token:v3 │ │ │ ├── {K=13824,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {K=14336,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {K=1536,N=1536,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {K=1536,N=8960,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {K=4096,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {K=5120,N=13824,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {K=5120,N=2048,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {K=5120,N=28672,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {K=5120,N=4096,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {K=5120,N=5120,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ ├── {K=8960,N=1536,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ └── {N=14336,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ │ └── silu_and_mul_fwd:v1 │ │ │ └── {N=14336,out_dtype=torch.bfloat16}_NVIDIA_GeForce_RTX_5090.json │ │ └── autotuner.py ├── distributed │ ├── __init__.py │ ├── communication_op.py │ ├── custom_all_gather.py │ ├── custom_all_reduce.py │ ├── pynccl.py │ └── pynccl_wrapper.py ├── models │ ├── __init__.py │ ├── bloom │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ ├── post_layer_infer.py │ │ │ ├── pre_layer_infer.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── hf_load_utils.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ ├── model.py │ │ └── triton_kernel │ │ │ ├── __init__.py │ │ │ ├── context_flashattention_nopad.py │ │ │ ├── layernorm.py │ │ │ ├── token_attention_nopad_att1.py │ │ │ ├── token_attention_nopad_reduceV.py │ │ │ ├── token_attention_nopad_softmax.py │ │ │ └── token_flashattention_nopad.py │ ├── chatglm2 │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ ├── model.py │ │ └── triton_kernel │ │ │ ├── __init__.py │ │ │ └── rotary_emb.py │ ├── cohere │ │ ├── __init__.py │ │ ├── infer_struct.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ ├── post_layer_infer.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ ├── model.py │ │ └── triton_kernels │ │ │ ├── __init__.py │ │ │ ├── layernorm.py │ │ │ └── rotary_emb.py │ ├── deepseek2 │ │ ├── __init__.py │ │ ├── flashattention_infer_struct.py │ │ ├── flashinfer_struct.py │ │ ├── infer_struct.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── transformer_layer_weight.py │ │ ├── model.py │ │ └── triton_kernel │ │ │ ├── __init__.py │ │ │ ├── context_flashattention_nopad.py │ │ │ ├── context_flashattention_nopad_fp8.py │ │ │ ├── context_flashattention_nopad_with_v.py │ │ │ ├── destindex_copy_kv.py │ │ │ ├── destindex_copy_kv_fp8.py │ │ │ ├── gqa_flash_decoding.py │ │ │ ├── gqa_flash_decoding_config.py │ │ │ ├── gqa_flash_decoding_fp8.py │ │ │ ├── gqa_flash_decoding_stage1.py │ │ │ ├── gqa_flash_decoding_stage1_fp8.py │ │ │ ├── gqa_flash_decoding_stage2.py │ │ │ ├── repack_kv_index.py │ │ │ ├── repeat_rope.py │ │ │ ├── rotary_emb.py │ │ │ ├── rotary_emb_config.py │ │ │ ├── sample_kv.py │ │ │ └── weight_dequant.py │ ├── deepseek_mtp │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── pre_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── pre_and_post_layer_weight.py │ │ └── model.py │ ├── gemma3 │ │ ├── __init__.py │ │ ├── gemma3_visual.py │ │ ├── infer_struct.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ ├── post_layer_infer.py │ │ │ ├── pre_layer_infer.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── gemma_2b │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ ├── pre_layer_infer.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ ├── model.py │ │ └── triton_kernel │ │ │ ├── __init__.py │ │ │ └── gelu_and_mul.py │ ├── gpt_oss │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── internlm │ │ ├── __init__.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── internlm2 │ │ ├── __init__.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── internlm2_reward │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── post_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── pre_and_post_layer_weight.py │ │ └── model.py │ ├── internvl │ │ ├── __init__.py │ │ ├── img_process.py │ │ ├── internvl_visual.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── pre_and_post_layer_weight.py │ │ └── model.py │ ├── llama │ │ ├── __init__.py │ │ ├── flashattention_infer_struct.py │ │ ├── flashinfer_struct.py │ │ ├── infer_struct.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ ├── post_layer_infer.py │ │ │ ├── pre_layer_infer.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── ds_load_utils.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ ├── model.py │ │ ├── triton_kernel │ │ │ ├── __init__.py │ │ │ ├── context_flashattention_nopad.py │ │ │ ├── embedding.py │ │ │ ├── flash_decoding.py │ │ │ ├── flash_decoding_stage1.py │ │ │ ├── flash_decoding_stage2.py │ │ │ ├── gqa_decode_flashattention_nopad.py │ │ │ ├── gqa_flash_decoding.py │ │ │ ├── gqa_flash_decoding_stage1.py │ │ │ ├── gqa_flash_decoding_stage2.py │ │ │ ├── gqa_flash_decoding_vsm.py │ │ │ ├── ppl_fp16_flash_decoding.py │ │ │ ├── ppl_int4kv_copy_kv.py │ │ │ ├── ppl_int4kv_flash_decoding.py │ │ │ ├── ppl_int8kv_flash_decoding.py │ │ │ ├── ppl_int8kv_flash_decoding_diverse.py │ │ │ ├── ppl_int8kv_flash_decoding_diverse_stage1.py │ │ │ ├── ppl_int8kv_flash_decoding_diverse_stage3.py │ │ │ ├── ppl_quant_copy_kv.py │ │ │ ├── rmsnorm.py │ │ │ ├── rotary_emb.py │ │ │ ├── silu_and_mul.py │ │ │ ├── token_attention_nopad_att1.py │ │ │ ├── token_attention_nopad_reduceV.py │ │ │ ├── token_attention_nopad_softmax.py │ │ │ └── token_attention_softmax_and_reducev.py │ │ └── yarn_rotary_utils.py │ ├── llava │ │ ├── __init__.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── pre_and_post_layer_weight.py │ │ ├── llava_visual.py │ │ └── model.py │ ├── minicpm │ │ ├── __init__.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── mistral │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── model.py │ │ └── triton_kernel │ │ │ ├── __init__.py │ │ │ ├── context_flashattention_nopad.py │ │ │ ├── init_att_sliding_window_info.py │ │ │ ├── token_attention_nopad_att1.py │ │ │ ├── token_attention_nopad_reduceV.py │ │ │ └── token_attention_softmax_and_reducev.py │ ├── mixtral │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ ├── _custom_ops.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── phi3 │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── transformer_layer_weight.py │ │ ├── model.py │ │ └── triton_kernel │ │ │ ├── __init__.py │ │ │ ├── context_flashattention_nopad.py │ │ │ ├── destindex_copy_kv.py │ │ │ ├── flash_decoding.py │ │ │ ├── flash_decoding_stage1.py │ │ │ ├── flash_decoding_stage2.py │ │ │ └── rotary_emb.py │ ├── qwen │ │ ├── __init__.py │ │ ├── infer_struct.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── qwen2 │ │ ├── __init__.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── qwen2_5_vl │ │ ├── __init__.py │ │ └── qwen2_5_visual.py │ ├── qwen2_reward │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── post_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── pre_and_post_layer_weight.py │ │ └── model.py │ ├── qwen2_vl │ │ ├── __init__.py │ │ ├── flashattention_infer_struct.py │ │ ├── infer_struct.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── model.py │ │ ├── qwen2_visual.py │ │ ├── triton_kernel │ │ │ ├── __init__.py │ │ │ ├── mrope.py │ │ │ └── rotary_pos_emb.py │ │ └── vision_process.py │ ├── qwen3 │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── qwen3_moe │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── qwen_vl │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── pre_layer_infer.py │ │ ├── model.py │ │ └── qwen_visual.py │ ├── registry.py │ ├── stablelm │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── starcoder │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ ├── pre_layer_infer.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── starcoder2 │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ └── model.py │ ├── tarsier2 │ │ ├── __init__.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ └── pre_and_post_layer_weight.py │ │ ├── model.py │ │ └── tarsier2_visual.py │ ├── vit │ │ ├── __init__.py │ │ ├── layer_infer │ │ │ ├── __init__.py │ │ │ ├── post_layer_infer.py │ │ │ ├── pre_layer_infer.py │ │ │ └── transformer_layer_infer.py │ │ ├── layer_weights │ │ │ ├── __init__.py │ │ │ ├── hf_load_utils.py │ │ │ ├── pre_and_post_layer_weight.py │ │ │ └── transformer_layer_weight.py │ │ ├── model.py │ │ └── triton_kernel │ │ │ ├── __init__.py │ │ │ ├── flashattention_nopad.py │ │ │ ├── gelu_vit.py │ │ │ └── rms_norm_vit.py │ └── whisper │ │ ├── __init__.py │ │ ├── defaults.py │ │ ├── modeling_whisper.py │ │ └── whisper_audio.py ├── server │ ├── __init__.py │ ├── api_cli.py │ ├── api_http.py │ ├── api_lightllm.py │ ├── api_models.py │ ├── api_openai.py │ ├── api_server.py │ ├── api_start.py │ ├── api_tgi.py │ ├── audioserver │ │ ├── __init__.py │ │ ├── manager.py │ │ └── model_infer │ │ │ ├── __init__.py │ │ │ └── model_rpc.py │ ├── build_prompt.py │ ├── config_server │ │ ├── __init__.py │ │ ├── api_http.py │ │ └── nccl_tcp_store.py │ ├── core │ │ ├── __init__.py │ │ └── objs │ │ │ ├── __init__.py │ │ │ ├── atomic_array_lock.py │ │ │ ├── atomic_lock.py │ │ │ ├── io_objs │ │ │ ├── __init__.py │ │ │ └── group_req.py │ │ │ ├── nixl_params.py │ │ │ ├── out_token_circlequeue.py │ │ │ ├── py_sampling_params.py │ │ │ ├── req.py │ │ │ ├── rpc_shm.py │ │ │ ├── sampling_params.py │ │ │ ├── shm_array.py │ │ │ ├── shm_objs_io_buffer.py │ │ │ ├── shm_req_manager.py │ │ │ ├── start_args_type.py │ │ │ └── token_chunck_hash_list.py │ ├── detokenization │ │ ├── __init__.py │ │ ├── decode.py │ │ ├── decode_mode_fix.py │ │ ├── decode_req.py │ │ └── manager.py │ ├── embed_cache │ │ ├── __init__.py │ │ ├── impl │ │ │ ├── __init__.py │ │ │ └── naive_memory_cache.py │ │ ├── manager.py │ │ └── utils.py │ ├── function_call_parser.py │ ├── health_monitor │ │ ├── __init__.py │ │ └── manager.py │ ├── httpserver │ │ ├── __init__.py │ │ ├── async_queue.py │ │ ├── manager.py │ │ └── pd_loop.py │ ├── httpserver_for_pd_master │ │ ├── __init__.py │ │ ├── manager.py │ │ ├── pd_selector │ │ │ ├── __init__.py │ │ │ └── pd_selector.py │ │ └── register_loop.py │ ├── metrics │ │ ├── __init__.py │ │ ├── manager.py │ │ └── metrics.py │ ├── multi_level_kv_cache │ │ ├── __init__.py │ │ ├── cpu_cache_client.py │ │ ├── disk_cache_worker.py │ │ ├── manager.py │ │ └── shm_objs.py │ ├── multimodal_params.py │ ├── pd_io_struct.py │ ├── req_id_generator.py │ ├── router │ │ ├── __init__.py │ │ ├── batch.py │ │ ├── dynamic_prompt │ │ │ ├── __init__.py │ │ │ ├── radix_cache.py │ │ │ └── shared_arr.py │ │ ├── manager.py │ │ ├── model_infer │ │ │ ├── __init__.py │ │ │ ├── infer_batch.py │ │ │ ├── mode_backend │ │ │ │ ├── __init__.py │ │ │ │ ├── base_backend.py │ │ │ │ ├── chunked_prefill │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── control_state.py │ │ │ │ │ ├── impl.py │ │ │ │ │ ├── impl_for_first_token_constraint_mode.py │ │ │ │ │ ├── impl_for_outlines_constraint_mode.py │ │ │ │ │ ├── impl_for_return_all_prompt_logprobs.py │ │ │ │ │ ├── impl_for_reward_model.py │ │ │ │ │ ├── impl_for_token_healing.py │ │ │ │ │ └── impl_for_xgrammar_mode.py │ │ │ │ ├── continues_batch │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── pd_mode │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── decode_node_impl │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── decode_impl.py │ │ │ │ │ │ ├── decode_impl_for_dp.py │ │ │ │ │ │ ├── decode_infer_rpyc.py │ │ │ │ │ │ ├── decode_kv_move_manager.py │ │ │ │ │ │ ├── decode_task_cache.py │ │ │ │ │ │ ├── decode_trans_obj.py │ │ │ │ │ │ ├── decode_trans_process.py │ │ │ │ │ │ └── up_status.py │ │ │ │ │ │ ├── p2p_fix.py │ │ │ │ │ │ ├── prefill_node_impl │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── prefill_impl.py │ │ │ │ │ │ ├── prefill_impl_for_dp.py │ │ │ │ │ │ ├── prefill_infer_rpyc.py │ │ │ │ │ │ ├── prefill_kv_move_manager.py │ │ │ │ │ │ ├── prefill_task_cache.py │ │ │ │ │ │ ├── prefill_trans_obj.py │ │ │ │ │ │ └── prefill_trans_process.py │ │ │ │ │ │ ├── task_queue.py │ │ │ │ │ │ └── utils.py │ │ │ │ ├── diverse_backend │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── impl.py │ │ │ │ ├── dp_backend │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── control_state.py │ │ │ │ │ ├── dp_shared_kv_trans.py │ │ │ │ │ └── impl.py │ │ │ │ ├── generic_padded_pre_process.py │ │ │ │ ├── generic_post_process.py │ │ │ │ ├── generic_pre_process.py │ │ │ │ ├── mtp_pre_process.py │ │ │ │ ├── multi_level_kv_cache.py │ │ │ │ ├── overlap_events.py │ │ │ │ ├── pd_nixl │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base_kv_move_manager.py │ │ │ │ │ ├── decode_node_impl │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── decode_impl.py │ │ │ │ │ │ ├── decode_impl_for_dp.py │ │ │ │ │ │ ├── decode_kv_move_manager.py │ │ │ │ │ │ ├── decode_trans_process.py │ │ │ │ │ │ └── up_status.py │ │ │ │ │ ├── nixl_kv_transporter.py │ │ │ │ │ ├── prefill_node_impl │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── prefill_impl.py │ │ │ │ │ │ ├── prefill_impl_for_dp.py │ │ │ │ │ │ ├── prefill_kv_move_manager.py │ │ │ │ │ │ └── prefill_trans_process.py │ │ │ │ │ └── trans_process_obj.py │ │ │ │ ├── pre.py │ │ │ │ └── redundancy_expert_manager.py │ │ │ ├── model_rpc.py │ │ │ └── pin_mem_manager.py │ │ ├── req_queue │ │ │ ├── __init__.py │ │ │ ├── base_queue.py │ │ │ ├── chunked_prefill │ │ │ │ ├── __init__.py │ │ │ │ ├── beam_impl.py │ │ │ │ ├── impl.py │ │ │ │ ├── impl_for_nixl_pd.py │ │ │ │ └── impl_for_pd_decode.py │ │ │ ├── dp_balancer │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── bs.py │ │ │ │ └── roundrobin.py │ │ │ └── dp_base_queue.py │ │ ├── stats.py │ │ └── token_load.py │ ├── tokenizer.py │ └── visualserver │ │ ├── __init__.py │ │ ├── manager.py │ │ └── model_infer │ │ ├── __init__.py │ │ └── model_rpc.py └── utils │ ├── __init__.py │ ├── auto_shm_cleanup.py │ ├── config_utils.py │ ├── custom_kernel_utis.py │ ├── device_utils.py │ ├── dist_utils.py │ ├── envs_utils.py │ ├── error_utils.py │ ├── graceful_utils.py │ ├── health_check.py │ ├── infer_utils.py │ ├── kv_cache_utils.py │ ├── light_utils.py │ ├── llm_utils.py │ ├── log_utils.py │ ├── multimodal_utils.py │ ├── multinode_utils.py │ ├── net_utils.py │ ├── petrel_helper.py │ ├── process_check.py │ ├── profile_max_tokens.py │ ├── retry_utils.py │ ├── rpyc_fix_utils.py │ ├── sgl_utils.py │ ├── shm_size_check.py │ ├── shm_utils.py │ ├── start_utils.py │ ├── statics_utils.py │ ├── time_utils.py │ ├── torch_ops_utils.py │ ├── tuning_utils.py │ ├── vllm_utils.py │ └── watchdog_utils.py ├── requirements.txt ├── setup.py ├── test ├── advanced_config │ ├── fp8_calibration_per_head │ │ ├── test_kv_cache_calib_per_head_qwen2.5_14b.json │ │ ├── test_kv_cache_calib_per_head_qwen2.5_32b.json │ │ ├── test_kv_cache_calib_per_head_qwen2.5_72b.json │ │ ├── test_kv_cache_calib_per_head_qwen3_235b.json │ │ └── test_kv_cache_calib_per_head_qwen3_30b.json │ ├── fp8_calibration_per_tensor │ │ ├── test_kv_cache_calib_per_tensor_qwen2.5_14b.json │ │ ├── test_kv_cache_calib_per_tensor_qwen2.5_32b.json │ │ ├── test_kv_cache_calib_per_tensor_qwen2.5_72b.json │ │ ├── test_kv_cache_calib_per_tensor_qwen3_235b.json │ │ └── test_kv_cache_calib_per_tensor_qwen3_30b.json │ ├── mixed_quantization │ │ └── llamacls-mix-down.yaml │ └── redundancy_expert │ │ └── test_redundancy_expert_config.json ├── benchmark │ ├── kernel │ │ └── benchmark_fused_moe_triton.py │ ├── service │ │ ├── benchmark_client.py │ │ ├── benchmark_mcq.py │ │ ├── benchmark_prompt_cache.py │ │ ├── benchmark_prompt_cache_multi_server.py │ │ ├── benchmark_qps.py │ │ └── benchmark_sharegpt.py │ └── static_inference │ │ ├── model_infer.py │ │ ├── model_infer_mtp.py │ │ ├── profile_demo.py │ │ ├── test_model.py │ │ └── test_vit.py ├── chat_template │ ├── tool_chat_template_deepseekr1.jinja │ ├── tool_chat_template_deepseekv3.jinjia │ ├── tool_chat_template_deepseekv31.jinja │ └── tool_chat_template_deepseekv32.jinjia ├── compare_with_previous_commit.py ├── format_out │ ├── gomoku_game.py │ ├── qabot.py │ ├── test_constraint_server.py │ ├── test_demo.py │ └── test_xgrammar_constraint.py ├── kernel │ ├── deepseekv2_bmm_scaled_fp8_tuning.py │ ├── deepseekv2_gqa_decode_tuning.py │ ├── deepseekv3_fp8_block_gemm_tuning.py │ ├── deepseekv3_rotary_emb_tuning.py │ ├── fuse_moe_tuning.py │ ├── llama_gqa_decode_vsm_tuning.py │ ├── llama_gqa_diverse_decode_stage1_tuning.py │ ├── moe_silu_and_mul_tuning_bf16.py │ └── moe_sum_reduce_tuning_bf16.py ├── start_scripts │ ├── README.md │ ├── draft.sh │ ├── multi_node_ep_node0.sh │ ├── multi_node_ep_node1.sh │ ├── multi_node_tp_node0.sh │ ├── multi_node_tp_node1.sh │ ├── multi_pd_master.sh │ ├── multi_pd_master │ │ ├── config_server.sh │ │ ├── pd_decode.sh │ │ ├── pd_master_1.sh │ │ ├── pd_master_2.sh │ │ └── pd_prefill.sh │ ├── single_node_ep.sh │ ├── single_node_tp.sh │ ├── single_node_tp_cpu_cache_enable.sh │ └── single_pd_master │ │ ├── pd_decode.sh │ │ ├── pd_master.sh │ │ ├── pd_nixl_decode.sh │ │ ├── pd_nixl_prefill.sh │ │ └── pd_prefill.sh └── test_api │ ├── test.jpg │ ├── test_generate_api.py │ ├── test_multimodal_api.py │ └── test_openai_api.py ├── tools ├── quick_launch_docker.py └── resolve_ptx_version └── unit_tests ├── common ├── basemodel │ └── triton_kernel │ │ ├── test_add_in_place.py │ │ ├── test_gen_decode_params.py │ │ ├── test_gen_mtp_prefill_params.py │ │ ├── test_gen_prefill_params.py │ │ ├── test_gen_sampling_params.py │ │ ├── test_multimodal_emb.py │ │ ├── test_redundancy_topk_ids_repair.py │ │ └── test_sp_pad_kernel.py ├── fused_moe │ ├── test_deepep.py │ ├── test_grouped_fused_moe.py │ ├── test_grouped_fused_moe_speed.py │ ├── test_grouped_topk.py │ ├── test_moe_silu_and_mul_mix_quant_ep.py │ └── test_softmax_topk.py ├── kv_trans_kernel │ ├── test_kv_trans_v2.py │ └── test_nixl_kv_trans.py └── quantization │ └── test_fp8_scaled_mm_per_token.py ├── models ├── deepseek2 │ ├── test_destindex_copy_kv.py │ ├── test_destindex_copy_kv_fp8.py │ ├── test_gqa_flash_decoding.py │ ├── test_gqa_flash_decoding_fp8.py │ ├── test_repack_kv_index.py │ └── test_rope_repeat.py ├── llama │ ├── test_context_flashattention_nopad.py │ ├── test_context_flashattention_nopad_fa3_fp8.py │ ├── test_context_flashattention_nopad_flashinfer_fp8.py │ ├── test_ppl_int8kv_flash_decoding_diverse.py │ ├── test_ppl_int8kv_flash_decoding_diverse_stage1.py │ ├── test_ppl_int8kv_flash_decoding_diverse_stage2.py │ ├── test_ppl_int8kv_flash_decoding_diverse_stage3.py │ ├── test_token_attention_nopad.py │ ├── test_token_attention_nopad_fa3_fp8.py │ └── test_token_attention_nopad_flashinfer_fp8.py ├── qwen2_vl │ ├── test_mrope.py │ └── test_rotary_pos_emb.py └── vit │ └── test_flash_attention_forward.py ├── server ├── core │ └── objs │ │ ├── test_atomic_array_lock.py │ │ ├── test_atomic_lock.py │ │ ├── test_out_token_circlequeue.py │ │ ├── test_req.py │ │ ├── test_sampling_params.py │ │ ├── test_shm_array.py │ │ └── test_shm_req_manager.py └── router │ └── dynamic_prompt │ └── test_radix_cache.py └── utils └── test_custom_kernel_utils.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/.github/ISSUE_TEMPLATE/bug_report.md -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/.github/workflows/docker-publish.yml -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .pyc 3 | build 4 | dist 5 | *.egg-info 6 | .idea 7 | .vscode 8 | tmp/ 9 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/README.md -------------------------------------------------------------------------------- /assets/att.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/assets/att.gif -------------------------------------------------------------------------------- /assets/lightllm.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/assets/lightllm.drawio.png -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/assets/logo.png -------------------------------------------------------------------------------- /assets/logo_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/assets/logo_new.png -------------------------------------------------------------------------------- /benchmark.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/benchmark.md -------------------------------------------------------------------------------- /demos/qa_server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demos/qa_server/chat_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/demos/qa_server/chat_server.py -------------------------------------------------------------------------------- /demos/qa_server/qabot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/demos/qa_server/qabot.py -------------------------------------------------------------------------------- /demos/qa_server/templates/chat.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/demos/qa_server/templates/chat.html -------------------------------------------------------------------------------- /demos/readme.txt: -------------------------------------------------------------------------------- 1 | 一些应用demo的目录 -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/Dockerfile.deepep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/Dockerfile.deepep -------------------------------------------------------------------------------- /docker/Dockerfile.nixl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/Dockerfile.nixl -------------------------------------------------------------------------------- /docker/Dockerfile.nixl.deepep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/Dockerfile.nixl.deepep -------------------------------------------------------------------------------- /docker/cuda_version_12.6.1/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.6.1/Dockerfile -------------------------------------------------------------------------------- /docker/cuda_version_12.6.1/Dockerfile.deepep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.6.1/Dockerfile.deepep -------------------------------------------------------------------------------- /docker/cuda_version_12.6.1/Dockerfile.nixl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.6.1/Dockerfile.nixl -------------------------------------------------------------------------------- /docker/cuda_version_12.6.1/Dockerfile.nixl.deepep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.6.1/Dockerfile.nixl.deepep -------------------------------------------------------------------------------- /docker/cuda_version_12.8.0/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.8.0/Dockerfile -------------------------------------------------------------------------------- /docker/cuda_version_12.8.0/Dockerfile.deepep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.8.0/Dockerfile.deepep -------------------------------------------------------------------------------- /docker/cuda_version_12.8.0/Dockerfile.nixl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.8.0/Dockerfile.nixl -------------------------------------------------------------------------------- /docker/cuda_version_12.8.0/Dockerfile.nixl.deepep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep -------------------------------------------------------------------------------- /docker/cuda_version_12.8.0/Dockerfile.nixl.deepep.cache: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docker/cuda_version_12.8.0/Dockerfile.nixl.deepep.cache -------------------------------------------------------------------------------- /docs/CN/.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/.readthedocs.yaml -------------------------------------------------------------------------------- /docs/CN/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/Makefile -------------------------------------------------------------------------------- /docs/CN/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/README.md -------------------------------------------------------------------------------- /docs/CN/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/make.bat -------------------------------------------------------------------------------- /docs/CN/rebuild.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/rebuild.sh -------------------------------------------------------------------------------- /docs/CN/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/requirements-docs.txt -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/ER1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/ER1.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/ER2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/ER2.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/ER3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/ER3.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/ER4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/ER4.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/HttpServer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/HttpServer.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/Performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/Performance.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/Performance2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/Performance2.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/Router.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/Router.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/Visual_Server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/Visual_Server.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/arch.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/backend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/backend.png -------------------------------------------------------------------------------- /docs/CN/source/assets/lightllm/token_attn.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/lightllm/token_attn.gif -------------------------------------------------------------------------------- /docs/CN/source/assets/logos/lightllm-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/assets/logos/lightllm-logo.png -------------------------------------------------------------------------------- /docs/CN/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/conf.py -------------------------------------------------------------------------------- /docs/CN/source/framework/framework.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/framework/framework.rst -------------------------------------------------------------------------------- /docs/CN/source/framework/router.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/framework/router.rst -------------------------------------------------------------------------------- /docs/CN/source/framework/token_attention.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/framework/token_attention.rst -------------------------------------------------------------------------------- /docs/CN/source/getting_started/benchmark.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/getting_started/benchmark.rst -------------------------------------------------------------------------------- /docs/CN/source/getting_started/installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/getting_started/installation.rst -------------------------------------------------------------------------------- /docs/CN/source/getting_started/quickstart.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/getting_started/quickstart.rst -------------------------------------------------------------------------------- /docs/CN/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/index.rst -------------------------------------------------------------------------------- /docs/CN/source/models/add_new_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/models/add_new_model.md -------------------------------------------------------------------------------- /docs/CN/source/models/supported_models.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/models/supported_models.rst -------------------------------------------------------------------------------- /docs/CN/source/tutorial/api_param.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/api_param.rst -------------------------------------------------------------------------------- /docs/CN/source/tutorial/api_server_args_zh.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/api_server_args_zh.rst -------------------------------------------------------------------------------- /docs/CN/source/tutorial/deepseek_deployment.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/deepseek_deployment.rst -------------------------------------------------------------------------------- /docs/CN/source/tutorial/multi_level_cache_deployment.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/multi_level_cache_deployment.rst -------------------------------------------------------------------------------- /docs/CN/source/tutorial/multimodal.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/multimodal.rst -------------------------------------------------------------------------------- /docs/CN/source/tutorial/openai.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/openai.rst -------------------------------------------------------------------------------- /docs/CN/source/tutorial/reward_model.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/CN/source/tutorial/reward_model.rst -------------------------------------------------------------------------------- /docs/EN/.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/.readthedocs.yaml -------------------------------------------------------------------------------- /docs/EN/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/Makefile -------------------------------------------------------------------------------- /docs/EN/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/README.md -------------------------------------------------------------------------------- /docs/EN/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/make.bat -------------------------------------------------------------------------------- /docs/EN/rebuild.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/rebuild.sh -------------------------------------------------------------------------------- /docs/EN/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/requirements-docs.txt -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/ER1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/ER1.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/ER2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/ER2.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/ER3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/ER3.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/ER4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/ER4.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/HttpServer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/HttpServer.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/Performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/Performance.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/Performance2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/Performance2.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/Router.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/Router.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/Visual_Server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/Visual_Server.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/arch.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/backend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/backend.png -------------------------------------------------------------------------------- /docs/EN/source/assets/lightllm/token_attn.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/lightllm/token_attn.gif -------------------------------------------------------------------------------- /docs/EN/source/assets/logos/lightllm-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/assets/logos/lightllm-logo.png -------------------------------------------------------------------------------- /docs/EN/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/conf.py -------------------------------------------------------------------------------- /docs/EN/source/framework/framework.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/framework/framework.rst -------------------------------------------------------------------------------- /docs/EN/source/framework/router.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/framework/router.rst -------------------------------------------------------------------------------- /docs/EN/source/framework/token_attention.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/framework/token_attention.rst -------------------------------------------------------------------------------- /docs/EN/source/getting_started/benchmark.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/getting_started/benchmark.rst -------------------------------------------------------------------------------- /docs/EN/source/getting_started/installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/getting_started/installation.rst -------------------------------------------------------------------------------- /docs/EN/source/getting_started/quickstart.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/getting_started/quickstart.rst -------------------------------------------------------------------------------- /docs/EN/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/index.rst -------------------------------------------------------------------------------- /docs/EN/source/models/add_new_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/models/add_new_model.md -------------------------------------------------------------------------------- /docs/EN/source/models/supported_models.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/models/supported_models.rst -------------------------------------------------------------------------------- /docs/EN/source/tutorial/api_param.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/api_param.rst -------------------------------------------------------------------------------- /docs/EN/source/tutorial/api_server_args_zh.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/api_server_args_zh.rst -------------------------------------------------------------------------------- /docs/EN/source/tutorial/deepseek_deployment.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/deepseek_deployment.rst -------------------------------------------------------------------------------- /docs/EN/source/tutorial/multi_level_cache_deployment.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/multi_level_cache_deployment.rst -------------------------------------------------------------------------------- /docs/EN/source/tutorial/multimodal.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/multimodal.rst -------------------------------------------------------------------------------- /docs/EN/source/tutorial/openai.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/openai.rst -------------------------------------------------------------------------------- /docs/EN/source/tutorial/reward_model.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/docs/EN/source/tutorial/reward_model.rst -------------------------------------------------------------------------------- /format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format.py -------------------------------------------------------------------------------- /format_out/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /format_out/grammer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/__init__.py -------------------------------------------------------------------------------- /format_out/grammer/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/core.py -------------------------------------------------------------------------------- /format_out/grammer/dpda.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/dpda.py -------------------------------------------------------------------------------- /format_out/grammer/json.ebnf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/json.ebnf -------------------------------------------------------------------------------- /format_out/grammer/test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test.sh -------------------------------------------------------------------------------- /format_out/grammer/test0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test0.py -------------------------------------------------------------------------------- /format_out/grammer/test1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test1.py -------------------------------------------------------------------------------- /format_out/grammer/test2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test2.py -------------------------------------------------------------------------------- /format_out/grammer/test3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test3.py -------------------------------------------------------------------------------- /format_out/grammer/test4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test4.py -------------------------------------------------------------------------------- /format_out/grammer/test5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test5.py -------------------------------------------------------------------------------- /format_out/grammer/test6.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/grammer/test6.py -------------------------------------------------------------------------------- /format_out/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/format_out/impl.py -------------------------------------------------------------------------------- /lightllm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/all_kernel_configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/basemodel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/__init__.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/basemodel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/basemodel.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/batch_objs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/batch_objs.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/cuda_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/cuda_graph.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/infer_lock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/infer_lock.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/infer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/infer_struct.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/base_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/base_layer_infer.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/cache_tensor_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/cache_tensor_manager.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/post_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/post_layer_infer.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/pre_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/pre_layer_infer.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/template/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/template/post_layer_infer_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/template/post_layer_infer_template.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/template/pre_layer_infer_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/template/pre_layer_infer_template.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/template/transformer_layer_infer_cohere_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/template/transformer_layer_infer_cohere_template.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/template/transformer_layer_infer_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/template/transformer_layer_infer_template.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/base_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/base_layer_weight.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/hf_load_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/hf_load_utils.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/__init__.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/base_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/base_weight.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep_redundancy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep_redundancy.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_tp.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/gpt_oss_fused_moe_weight_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/gpt_oss_fused_moe_weight_tp.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/__init__.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/colmm_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/colmm_weight.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_factory.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_slicer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_slicer.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/mm_weight.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/rowmm_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight/rowmm_weight.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/meta_weights/norm_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/meta_weights/norm_weight.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/multimodal_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/multimodal_tokenizer.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/add_in_place.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/add_in_place.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/apply_penalty.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/apply_penalty.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/apply_penalty_gpu_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/apply_penalty_gpu_cache.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/bmm_scaled_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/bmm_scaled_fp8.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/copy_kv_index_to_req.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/copy_kv_index_to_req.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/dequantize_gemm_int4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/dequantize_gemm_int4.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/dequantize_gemm_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/dequantize_gemm_int8.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/destindex_copy_kv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/destindex_copy_kv.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/destindex_copy_kv_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/destindex_copy_kv_fp8.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/fa3_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/fa3_utils.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/gather_token_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/gather_token_id.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/gen_decode_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/gen_decode_params.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/gen_mtp_prefill_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/gen_mtp_prefill_params.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/gen_prefill_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/gen_prefill_params.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/gen_sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/gen_sampling_params.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/kv_cache_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/kv_cache_offload.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/mtp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/mtp_utils.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/multimodal_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/multimodal_emb.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/q_per_head_fp8_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/q_per_head_fp8_quant.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/quantize_gemm_int8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/quantize_gemm_int8.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/redundancy_topk_ids_repair.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/redundancy_topk_ids_repair.py -------------------------------------------------------------------------------- /lightllm/common/basemodel/triton_kernel/sp_pad_copy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/basemodel/triton_kernel/sp_pad_copy.py -------------------------------------------------------------------------------- /lightllm/common/build_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/build_utils.py -------------------------------------------------------------------------------- /lightllm/common/cuda_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/cuda_wrapper.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/fused_moe/deepep_scatter_gather.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/deepep_scatter_gather.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/grouped_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/grouped_fused_moe.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/grouped_fused_moe_ep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/grouped_fused_moe_ep.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/grouped_topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/grouped_topk.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/moe_kernel_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_kernel_configs.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/moe_silu_and_mul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_silu_and_mul.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/moe_silu_and_mul_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_silu_and_mul_config.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/moe_silu_and_mul_mix_quant_ep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_silu_and_mul_mix_quant_ep.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/moe_sum_recude_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_sum_recude_config.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/moe_sum_reduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/moe_sum_reduce.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/softmax_topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/softmax_topk.py -------------------------------------------------------------------------------- /lightllm/common/fused_moe/topk_select.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/fused_moe/topk_select.py -------------------------------------------------------------------------------- /lightllm/common/infer_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/infer_utils.py -------------------------------------------------------------------------------- /lightllm/common/kernel_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kernel_config.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/__init__.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/calibration_fp8kv_mem_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/calibration_fp8kv_mem_manager.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/deepseek2_fp8kv_mem_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/deepseek2_fp8kv_mem_manager.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/deepseek2_mem_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/deepseek2_mem_manager.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/export_calibration_mem_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/export_calibration_mem_manager.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/int8kv_mem_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/int8kv_mem_manager.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/mem_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/mem_manager.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/mem_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/mem_utils.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/offline_fp8_quant_mem_manager.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/ppl_int4kv_mem_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/ppl_int4kv_mem_manager.py -------------------------------------------------------------------------------- /lightllm/common/kv_cache_mem_manager/ppl_int8kv_mem_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_cache_mem_manager/ppl_int8kv_mem_manager.py -------------------------------------------------------------------------------- /lightllm/common/kv_trans_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/kv_trans_kernel/kv_trans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_trans_kernel/kv_trans.py -------------------------------------------------------------------------------- /lightllm/common/kv_trans_kernel/kv_trans_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_trans_kernel/kv_trans_v2.py -------------------------------------------------------------------------------- /lightllm/common/kv_trans_kernel/nixl_kv_trans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/kv_trans_kernel/nixl_kv_trans.py -------------------------------------------------------------------------------- /lightllm/common/quantization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/__init__.py -------------------------------------------------------------------------------- /lightllm/common/quantization/awq_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/awq_quant.py -------------------------------------------------------------------------------- /lightllm/common/quantization/deepgemm_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/deepgemm_quant.py -------------------------------------------------------------------------------- /lightllm/common/quantization/quantize_method.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/quantize_method.py -------------------------------------------------------------------------------- /lightllm/common/quantization/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/registry.py -------------------------------------------------------------------------------- /lightllm/common/quantization/torchao_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/torchao_quant.py -------------------------------------------------------------------------------- /lightllm/common/quantization/triton_quant/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/quantization/triton_quant/fp8/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/quantization/triton_quant/fp8/fp8act_quant_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/triton_quant/fp8/fp8act_quant_kernel.py -------------------------------------------------------------------------------- /lightllm/common/quantization/triton_quant/fp8/fp8w8a8_block_gemm_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_block_gemm_kernel.py -------------------------------------------------------------------------------- /lightllm/common/quantization/triton_quant/fp8/fp8w8a8_block_quant_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_block_quant_kernel.py -------------------------------------------------------------------------------- /lightllm/common/quantization/triton_quant/fp8/fp8w8a8_scaled_mm_per_token_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_scaled_mm_per_token_kernel.py -------------------------------------------------------------------------------- /lightllm/common/quantization/triton_quant/triton_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/triton_quant/triton_quant.py -------------------------------------------------------------------------------- /lightllm/common/quantization/w8a8_quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/quantization/w8a8_quant.py -------------------------------------------------------------------------------- /lightllm/common/req_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/req_manager.py -------------------------------------------------------------------------------- /lightllm/common/triton_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/common/triton_utils/autotuner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/common/triton_utils/autotuner.py -------------------------------------------------------------------------------- /lightllm/distributed/__init__.py: -------------------------------------------------------------------------------- 1 | from .communication_op import * 2 | -------------------------------------------------------------------------------- /lightllm/distributed/communication_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/distributed/communication_op.py -------------------------------------------------------------------------------- /lightllm/distributed/custom_all_gather.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/distributed/custom_all_gather.py -------------------------------------------------------------------------------- /lightllm/distributed/custom_all_reduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/distributed/custom_all_reduce.py -------------------------------------------------------------------------------- /lightllm/distributed/pynccl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/distributed/pynccl.py -------------------------------------------------------------------------------- /lightllm/distributed/pynccl_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/distributed/pynccl_wrapper.py -------------------------------------------------------------------------------- /lightllm/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/__init__.py -------------------------------------------------------------------------------- /lightllm/models/bloom/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/bloom/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/bloom/layer_infer/post_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_infer/post_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/bloom/layer_infer/pre_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_infer/pre_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/bloom/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/bloom/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/bloom/layer_weights/hf_load_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_weights/hf_load_utils.py -------------------------------------------------------------------------------- /lightllm/models/bloom/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/bloom/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/bloom/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/model.py -------------------------------------------------------------------------------- /lightllm/models/bloom/triton_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/bloom/triton_kernel/context_flashattention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/context_flashattention_nopad.py -------------------------------------------------------------------------------- /lightllm/models/bloom/triton_kernel/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/layernorm.py -------------------------------------------------------------------------------- /lightllm/models/bloom/triton_kernel/token_attention_nopad_att1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/token_attention_nopad_att1.py -------------------------------------------------------------------------------- /lightllm/models/bloom/triton_kernel/token_attention_nopad_reduceV.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/token_attention_nopad_reduceV.py -------------------------------------------------------------------------------- /lightllm/models/bloom/triton_kernel/token_attention_nopad_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/token_attention_nopad_softmax.py -------------------------------------------------------------------------------- /lightllm/models/bloom/triton_kernel/token_flashattention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/bloom/triton_kernel/token_flashattention_nopad.py -------------------------------------------------------------------------------- /lightllm/models/chatglm2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/chatglm2/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/chatglm2/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/chatglm2/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/chatglm2/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/chatglm2/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/chatglm2/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/chatglm2/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/chatglm2/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/chatglm2/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/chatglm2/model.py -------------------------------------------------------------------------------- /lightllm/models/chatglm2/triton_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/chatglm2/triton_kernel/rotary_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/chatglm2/triton_kernel/rotary_emb.py -------------------------------------------------------------------------------- /lightllm/models/cohere/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/cohere/infer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/infer_struct.py -------------------------------------------------------------------------------- /lightllm/models/cohere/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/cohere/layer_infer/post_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/layer_infer/post_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/cohere/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/cohere/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/cohere/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/cohere/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/cohere/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/model.py -------------------------------------------------------------------------------- /lightllm/models/cohere/triton_kernels/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/cohere/triton_kernels/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/triton_kernels/layernorm.py -------------------------------------------------------------------------------- /lightllm/models/cohere/triton_kernels/rotary_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/cohere/triton_kernels/rotary_emb.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/deepseek2/flashattention_infer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/flashattention_infer_struct.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/flashinfer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/flashinfer_struct.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/infer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/infer_struct.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/deepseek2/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/deepseek2/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/model.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad_fp8.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad_with_v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/context_flashattention_nopad_with_v.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/destindex_copy_kv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/destindex_copy_kv.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/destindex_copy_kv_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/destindex_copy_kv_fp8.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_config.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_fp8.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage1.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage1_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage1_fp8.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage2.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/repack_kv_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/repack_kv_index.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/repeat_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/repeat_rope.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/rotary_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/rotary_emb.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/rotary_emb_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/rotary_emb_config.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/sample_kv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/sample_kv.py -------------------------------------------------------------------------------- /lightllm/models/deepseek2/triton_kernel/weight_dequant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek2/triton_kernel/weight_dequant.py -------------------------------------------------------------------------------- /lightllm/models/deepseek_mtp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/deepseek_mtp/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/deepseek_mtp/layer_infer/pre_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek_mtp/layer_infer/pre_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/deepseek_mtp/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/deepseek_mtp/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek_mtp/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/deepseek_mtp/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/deepseek_mtp/model.py -------------------------------------------------------------------------------- /lightllm/models/gemma3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/gemma3/gemma3_visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/gemma3_visual.py -------------------------------------------------------------------------------- /lightllm/models/gemma3/infer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/infer_struct.py -------------------------------------------------------------------------------- /lightllm/models/gemma3/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/gemma3/layer_infer/post_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/layer_infer/post_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/gemma3/layer_infer/pre_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/layer_infer/pre_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/gemma3/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/gemma3/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/gemma3/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/gemma3/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/gemma3/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma3/model.py -------------------------------------------------------------------------------- /lightllm/models/gemma_2b/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/gemma_2b/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/gemma_2b/layer_infer/pre_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/layer_infer/pre_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/gemma_2b/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/gemma_2b/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/gemma_2b/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/gemma_2b/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/gemma_2b/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/model.py -------------------------------------------------------------------------------- /lightllm/models/gemma_2b/triton_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/gemma_2b/triton_kernel/gelu_and_mul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gemma_2b/triton_kernel/gelu_and_mul.py -------------------------------------------------------------------------------- /lightllm/models/gpt_oss/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/gpt_oss/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gpt_oss/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/gpt_oss/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/gpt_oss/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gpt_oss/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/gpt_oss/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/gpt_oss/model.py -------------------------------------------------------------------------------- /lightllm/models/internlm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/internlm/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/internlm/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/internlm/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm/model.py -------------------------------------------------------------------------------- /lightllm/models/internlm2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/internlm2/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/internlm2/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/internlm2/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/internlm2/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2/model.py -------------------------------------------------------------------------------- /lightllm/models/internlm2_reward/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/internlm2_reward/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/internlm2_reward/layer_infer/post_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2_reward/layer_infer/post_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/internlm2_reward/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/internlm2_reward/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2_reward/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/internlm2_reward/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internlm2_reward/model.py -------------------------------------------------------------------------------- /lightllm/models/internvl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/internvl/img_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internvl/img_process.py -------------------------------------------------------------------------------- /lightllm/models/internvl/internvl_visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internvl/internvl_visual.py -------------------------------------------------------------------------------- /lightllm/models/internvl/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/internvl/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internvl/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/internvl/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/internvl/model.py -------------------------------------------------------------------------------- /lightllm/models/llama/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/llama/flashattention_infer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/flashattention_infer_struct.py -------------------------------------------------------------------------------- /lightllm/models/llama/flashinfer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/flashinfer_struct.py -------------------------------------------------------------------------------- /lightllm/models/llama/infer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/infer_struct.py -------------------------------------------------------------------------------- /lightllm/models/llama/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/llama/layer_infer/post_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_infer/post_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/llama/layer_infer/pre_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_infer/pre_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/llama/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/llama/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/llama/layer_weights/ds_load_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_weights/ds_load_utils.py -------------------------------------------------------------------------------- /lightllm/models/llama/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/llama/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/llama/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/model.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/context_flashattention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/embedding.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/flash_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/flash_decoding.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/flash_decoding_stage1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/flash_decoding_stage1.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/flash_decoding_stage2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/flash_decoding_stage2.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/gqa_decode_flashattention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/gqa_decode_flashattention_nopad.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/gqa_flash_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/gqa_flash_decoding.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/gqa_flash_decoding_stage1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/gqa_flash_decoding_stage1.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/gqa_flash_decoding_stage2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/gqa_flash_decoding_stage2.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/gqa_flash_decoding_vsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/gqa_flash_decoding_vsm.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/ppl_fp16_flash_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_fp16_flash_decoding.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/ppl_int4kv_copy_kv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int4kv_copy_kv.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/ppl_int4kv_flash_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int4kv_flash_decoding.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse_stage1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse_stage1.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse_stage3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_int8kv_flash_decoding_diverse_stage3.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/ppl_quant_copy_kv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/ppl_quant_copy_kv.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/rmsnorm.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/rotary_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/rotary_emb.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/silu_and_mul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/silu_and_mul.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/token_attention_nopad_att1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/token_attention_nopad_att1.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/token_attention_nopad_reduceV.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/token_attention_nopad_reduceV.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/token_attention_nopad_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/token_attention_nopad_softmax.py -------------------------------------------------------------------------------- /lightllm/models/llama/triton_kernel/token_attention_softmax_and_reducev.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/triton_kernel/token_attention_softmax_and_reducev.py -------------------------------------------------------------------------------- /lightllm/models/llama/yarn_rotary_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llama/yarn_rotary_utils.py -------------------------------------------------------------------------------- /lightllm/models/llava/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/llava/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/llava/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llava/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/llava/llava_visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llava/llava_visual.py -------------------------------------------------------------------------------- /lightllm/models/llava/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/llava/model.py -------------------------------------------------------------------------------- /lightllm/models/minicpm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/minicpm/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/minicpm/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/minicpm/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/minicpm/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/minicpm/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/minicpm/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/minicpm/model.py -------------------------------------------------------------------------------- /lightllm/models/mistral/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/mistral/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/mistral/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/mistral/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/model.py -------------------------------------------------------------------------------- /lightllm/models/mistral/triton_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/mistral/triton_kernel/context_flashattention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/triton_kernel/context_flashattention_nopad.py -------------------------------------------------------------------------------- /lightllm/models/mistral/triton_kernel/init_att_sliding_window_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/triton_kernel/init_att_sliding_window_info.py -------------------------------------------------------------------------------- /lightllm/models/mistral/triton_kernel/token_attention_nopad_att1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/triton_kernel/token_attention_nopad_att1.py -------------------------------------------------------------------------------- /lightllm/models/mistral/triton_kernel/token_attention_nopad_reduceV.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/triton_kernel/token_attention_nopad_reduceV.py -------------------------------------------------------------------------------- /lightllm/models/mistral/triton_kernel/token_attention_softmax_and_reducev.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mistral/triton_kernel/token_attention_softmax_and_reducev.py -------------------------------------------------------------------------------- /lightllm/models/mixtral/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/mixtral/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/mixtral/layer_infer/_custom_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mixtral/layer_infer/_custom_ops.py -------------------------------------------------------------------------------- /lightllm/models/mixtral/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mixtral/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/mixtral/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/mixtral/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mixtral/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/mixtral/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/mixtral/model.py -------------------------------------------------------------------------------- /lightllm/models/phi3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/phi3/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/phi3/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/phi3/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/phi3/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/phi3/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/model.py -------------------------------------------------------------------------------- /lightllm/models/phi3/triton_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lightllm/models/phi3/triton_kernel/context_flashattention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/context_flashattention_nopad.py -------------------------------------------------------------------------------- /lightllm/models/phi3/triton_kernel/destindex_copy_kv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/destindex_copy_kv.py -------------------------------------------------------------------------------- /lightllm/models/phi3/triton_kernel/flash_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/flash_decoding.py -------------------------------------------------------------------------------- /lightllm/models/phi3/triton_kernel/flash_decoding_stage1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/flash_decoding_stage1.py -------------------------------------------------------------------------------- /lightllm/models/phi3/triton_kernel/flash_decoding_stage2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/flash_decoding_stage2.py -------------------------------------------------------------------------------- /lightllm/models/phi3/triton_kernel/rotary_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/phi3/triton_kernel/rotary_emb.py -------------------------------------------------------------------------------- /lightllm/models/qwen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen/infer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen/infer_struct.py -------------------------------------------------------------------------------- /lightllm/models/qwen/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/qwen/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/qwen/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/qwen/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen/model.py -------------------------------------------------------------------------------- /lightllm/models/qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen2/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen2/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/qwen2/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/qwen2/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2/model.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_5_vl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen2_5_vl/qwen2_5_visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_5_vl/qwen2_5_visual.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_reward/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lightllm/models/qwen2_reward/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lightllm/models/qwen2_reward/layer_infer/post_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_reward/layer_infer/post_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_reward/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lightllm/models/qwen2_reward/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_reward/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_reward/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_reward/model.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/flashattention_infer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/flashattention_infer_struct.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/infer_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/infer_struct.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/model.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/qwen2_visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/qwen2_visual.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/triton_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/triton_kernel/mrope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/triton_kernel/mrope.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/triton_kernel/rotary_pos_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/triton_kernel/rotary_pos_emb.py -------------------------------------------------------------------------------- /lightllm/models/qwen2_vl/vision_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen2_vl/vision_process.py -------------------------------------------------------------------------------- /lightllm/models/qwen3/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen3/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen3/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/qwen3/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen3/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/qwen3/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3/model.py -------------------------------------------------------------------------------- /lightllm/models/qwen3_moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen3_moe/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen3_moe/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3_moe/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/qwen3_moe/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen3_moe/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3_moe/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/qwen3_moe/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen3_moe/model.py -------------------------------------------------------------------------------- /lightllm/models/qwen_vl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen_vl/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/qwen_vl/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen_vl/model.py -------------------------------------------------------------------------------- /lightllm/models/qwen_vl/qwen_visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/qwen_vl/qwen_visual.py -------------------------------------------------------------------------------- /lightllm/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/registry.py -------------------------------------------------------------------------------- /lightllm/models/stablelm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/stablelm/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/stablelm/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/stablelm/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/stablelm/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/stablelm/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/stablelm/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/stablelm/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/stablelm/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/stablelm/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/stablelm/model.py -------------------------------------------------------------------------------- /lightllm/models/starcoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/starcoder/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/starcoder/layer_infer/pre_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder/layer_infer/pre_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/starcoder/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/starcoder/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/starcoder/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/starcoder/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/starcoder/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder/model.py -------------------------------------------------------------------------------- /lightllm/models/starcoder2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/starcoder2/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/starcoder2/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder2/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/starcoder2/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/starcoder2/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder2/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/starcoder2/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder2/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/starcoder2/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/starcoder2/model.py -------------------------------------------------------------------------------- /lightllm/models/tarsier2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/tarsier2/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/tarsier2/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/tarsier2/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/tarsier2/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/tarsier2/model.py -------------------------------------------------------------------------------- /lightllm/models/tarsier2/tarsier2_visual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/tarsier2/tarsier2_visual.py -------------------------------------------------------------------------------- /lightllm/models/vit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/__init__.py -------------------------------------------------------------------------------- /lightllm/models/vit/layer_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/vit/layer_infer/post_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_infer/post_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/vit/layer_infer/pre_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_infer/pre_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/vit/layer_infer/transformer_layer_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_infer/transformer_layer_infer.py -------------------------------------------------------------------------------- /lightllm/models/vit/layer_weights/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/vit/layer_weights/hf_load_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_weights/hf_load_utils.py -------------------------------------------------------------------------------- /lightllm/models/vit/layer_weights/pre_and_post_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_weights/pre_and_post_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/vit/layer_weights/transformer_layer_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/layer_weights/transformer_layer_weight.py -------------------------------------------------------------------------------- /lightllm/models/vit/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/model.py -------------------------------------------------------------------------------- /lightllm/models/vit/triton_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/vit/triton_kernel/flashattention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/triton_kernel/flashattention_nopad.py -------------------------------------------------------------------------------- /lightllm/models/vit/triton_kernel/gelu_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/triton_kernel/gelu_vit.py -------------------------------------------------------------------------------- /lightllm/models/vit/triton_kernel/rms_norm_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/vit/triton_kernel/rms_norm_vit.py -------------------------------------------------------------------------------- /lightllm/models/whisper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/models/whisper/defaults.py: -------------------------------------------------------------------------------- 1 | MIN_AUDIO_LEN = 480 # 最短音频长度 2 | -------------------------------------------------------------------------------- /lightllm/models/whisper/modeling_whisper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/whisper/modeling_whisper.py -------------------------------------------------------------------------------- /lightllm/models/whisper/whisper_audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/models/whisper/whisper_audio.py -------------------------------------------------------------------------------- /lightllm/server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/__init__.py -------------------------------------------------------------------------------- /lightllm/server/api_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_cli.py -------------------------------------------------------------------------------- /lightllm/server/api_http.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_http.py -------------------------------------------------------------------------------- /lightllm/server/api_lightllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_lightllm.py -------------------------------------------------------------------------------- /lightllm/server/api_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_models.py -------------------------------------------------------------------------------- /lightllm/server/api_openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_openai.py -------------------------------------------------------------------------------- /lightllm/server/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_server.py -------------------------------------------------------------------------------- /lightllm/server/api_start.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_start.py -------------------------------------------------------------------------------- /lightllm/server/api_tgi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/api_tgi.py -------------------------------------------------------------------------------- /lightllm/server/audioserver/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/audioserver/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/audioserver/manager.py -------------------------------------------------------------------------------- /lightllm/server/audioserver/model_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/audioserver/model_infer/model_rpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/audioserver/model_infer/model_rpc.py -------------------------------------------------------------------------------- /lightllm/server/build_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/build_prompt.py -------------------------------------------------------------------------------- /lightllm/server/config_server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/config_server/__init__.py -------------------------------------------------------------------------------- /lightllm/server/config_server/api_http.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/config_server/api_http.py -------------------------------------------------------------------------------- /lightllm/server/config_server/nccl_tcp_store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/config_server/nccl_tcp_store.py -------------------------------------------------------------------------------- /lightllm/server/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/core/objs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/__init__.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/atomic_array_lock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/atomic_array_lock.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/atomic_lock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/atomic_lock.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/io_objs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/io_objs/__init__.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/io_objs/group_req.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/io_objs/group_req.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/nixl_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/nixl_params.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/out_token_circlequeue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/out_token_circlequeue.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/py_sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/py_sampling_params.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/req.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/req.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/rpc_shm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/rpc_shm.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/sampling_params.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/shm_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/shm_array.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/shm_objs_io_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/shm_objs_io_buffer.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/shm_req_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/shm_req_manager.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/start_args_type.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/start_args_type.py -------------------------------------------------------------------------------- /lightllm/server/core/objs/token_chunck_hash_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/core/objs/token_chunck_hash_list.py -------------------------------------------------------------------------------- /lightllm/server/detokenization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/detokenization/decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/detokenization/decode.py -------------------------------------------------------------------------------- /lightllm/server/detokenization/decode_mode_fix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/detokenization/decode_mode_fix.py -------------------------------------------------------------------------------- /lightllm/server/detokenization/decode_req.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/detokenization/decode_req.py -------------------------------------------------------------------------------- /lightllm/server/detokenization/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/detokenization/manager.py -------------------------------------------------------------------------------- /lightllm/server/embed_cache/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/embed_cache/__init__.py -------------------------------------------------------------------------------- /lightllm/server/embed_cache/impl/__init__.py: -------------------------------------------------------------------------------- 1 | from . import naive_memory_cache -------------------------------------------------------------------------------- /lightllm/server/embed_cache/impl/naive_memory_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/embed_cache/impl/naive_memory_cache.py -------------------------------------------------------------------------------- /lightllm/server/embed_cache/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/embed_cache/manager.py -------------------------------------------------------------------------------- /lightllm/server/embed_cache/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/embed_cache/utils.py -------------------------------------------------------------------------------- /lightllm/server/function_call_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/function_call_parser.py -------------------------------------------------------------------------------- /lightllm/server/health_monitor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/health_monitor/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/health_monitor/manager.py -------------------------------------------------------------------------------- /lightllm/server/httpserver/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/httpserver/async_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver/async_queue.py -------------------------------------------------------------------------------- /lightllm/server/httpserver/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver/manager.py -------------------------------------------------------------------------------- /lightllm/server/httpserver/pd_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver/pd_loop.py -------------------------------------------------------------------------------- /lightllm/server/httpserver_for_pd_master/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/httpserver_for_pd_master/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver_for_pd_master/manager.py -------------------------------------------------------------------------------- /lightllm/server/httpserver_for_pd_master/pd_selector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver_for_pd_master/pd_selector/__init__.py -------------------------------------------------------------------------------- /lightllm/server/httpserver_for_pd_master/pd_selector/pd_selector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver_for_pd_master/pd_selector/pd_selector.py -------------------------------------------------------------------------------- /lightllm/server/httpserver_for_pd_master/register_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/httpserver_for_pd_master/register_loop.py -------------------------------------------------------------------------------- /lightllm/server/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/metrics/__init__.py -------------------------------------------------------------------------------- /lightllm/server/metrics/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/metrics/manager.py -------------------------------------------------------------------------------- /lightllm/server/metrics/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/metrics/metrics.py -------------------------------------------------------------------------------- /lightllm/server/multi_level_kv_cache/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/multi_level_kv_cache/cpu_cache_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/multi_level_kv_cache/cpu_cache_client.py -------------------------------------------------------------------------------- /lightllm/server/multi_level_kv_cache/disk_cache_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/multi_level_kv_cache/disk_cache_worker.py -------------------------------------------------------------------------------- /lightllm/server/multi_level_kv_cache/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/multi_level_kv_cache/manager.py -------------------------------------------------------------------------------- /lightllm/server/multi_level_kv_cache/shm_objs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/multi_level_kv_cache/shm_objs.py -------------------------------------------------------------------------------- /lightllm/server/multimodal_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/multimodal_params.py -------------------------------------------------------------------------------- /lightllm/server/pd_io_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/pd_io_struct.py -------------------------------------------------------------------------------- /lightllm/server/req_id_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/req_id_generator.py -------------------------------------------------------------------------------- /lightllm/server/router/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/router/batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/batch.py -------------------------------------------------------------------------------- /lightllm/server/router/dynamic_prompt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/router/dynamic_prompt/radix_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/dynamic_prompt/radix_cache.py -------------------------------------------------------------------------------- /lightllm/server/router/dynamic_prompt/shared_arr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/dynamic_prompt/shared_arr.py -------------------------------------------------------------------------------- /lightllm/server/router/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/manager.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/infer_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/infer_batch.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/__init__.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/base_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/base_backend.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/chunked_prefill/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/chunked_prefill/control_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/chunked_prefill/control_state.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_reward_model.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_token_healing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_token_healing.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/continues_batch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/p2p_fix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/p2p_fix.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/task_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/task_queue.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/continues_batch/pd_mode/utils.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/diverse_backend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/diverse_backend/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/diverse_backend/impl.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/dp_backend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/dp_backend/control_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/dp_backend/control_state.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/dp_backend/dp_shared_kv_trans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/dp_backend/dp_shared_kv_trans.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/dp_backend/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/dp_backend/impl.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/generic_padded_pre_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/generic_padded_pre_process.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/generic_post_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/generic_post_process.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/generic_pre_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/generic_pre_process.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/mtp_pre_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/mtp_pre_process.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/multi_level_kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/multi_level_kv_cache.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/overlap_events.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/overlap_events.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pd_nixl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pd_nixl/base_kv_move_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/base_kv_move_manager.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/__init__.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/decode_impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/decode_impl.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/decode_impl_for_dp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/decode_impl_for_dp.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/up_status.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/decode_node_impl/up_status.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pd_nixl/nixl_kv_transporter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/nixl_kv_transporter.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/__init__.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/prefill_impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/prefill_node_impl/prefill_impl.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pd_nixl/trans_process_obj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pd_nixl/trans_process_obj.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/pre.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/pre.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/mode_backend/redundancy_expert_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/mode_backend/redundancy_expert_manager.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/model_rpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/model_rpc.py -------------------------------------------------------------------------------- /lightllm/server/router/model_infer/pin_mem_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/model_infer/pin_mem_manager.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/__init__.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/base_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/base_queue.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/chunked_prefill/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/chunked_prefill/beam_impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/chunked_prefill/beam_impl.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/chunked_prefill/impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/chunked_prefill/impl.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/chunked_prefill/impl_for_nixl_pd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/chunked_prefill/impl_for_nixl_pd.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/chunked_prefill/impl_for_pd_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/chunked_prefill/impl_for_pd_decode.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/dp_balancer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/dp_balancer/__init__.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/dp_balancer/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/dp_balancer/base.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/dp_balancer/bs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/dp_balancer/bs.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/dp_balancer/roundrobin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/dp_balancer/roundrobin.py -------------------------------------------------------------------------------- /lightllm/server/router/req_queue/dp_base_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/req_queue/dp_base_queue.py -------------------------------------------------------------------------------- /lightllm/server/router/stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/stats.py -------------------------------------------------------------------------------- /lightllm/server/router/token_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/router/token_load.py -------------------------------------------------------------------------------- /lightllm/server/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/tokenizer.py -------------------------------------------------------------------------------- /lightllm/server/visualserver/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/visualserver/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/visualserver/manager.py -------------------------------------------------------------------------------- /lightllm/server/visualserver/model_infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/server/visualserver/model_infer/model_rpc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/server/visualserver/model_infer/model_rpc.py -------------------------------------------------------------------------------- /lightllm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lightllm/utils/auto_shm_cleanup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/auto_shm_cleanup.py -------------------------------------------------------------------------------- /lightllm/utils/config_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/config_utils.py -------------------------------------------------------------------------------- /lightllm/utils/custom_kernel_utis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/custom_kernel_utis.py -------------------------------------------------------------------------------- /lightllm/utils/device_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/device_utils.py -------------------------------------------------------------------------------- /lightllm/utils/dist_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/dist_utils.py -------------------------------------------------------------------------------- /lightllm/utils/envs_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/envs_utils.py -------------------------------------------------------------------------------- /lightllm/utils/error_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/error_utils.py -------------------------------------------------------------------------------- /lightllm/utils/graceful_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/graceful_utils.py -------------------------------------------------------------------------------- /lightllm/utils/health_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/health_check.py -------------------------------------------------------------------------------- /lightllm/utils/infer_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/infer_utils.py -------------------------------------------------------------------------------- /lightllm/utils/kv_cache_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/kv_cache_utils.py -------------------------------------------------------------------------------- /lightllm/utils/light_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/light_utils.py -------------------------------------------------------------------------------- /lightllm/utils/llm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/llm_utils.py -------------------------------------------------------------------------------- /lightllm/utils/log_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/log_utils.py -------------------------------------------------------------------------------- /lightllm/utils/multimodal_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/multimodal_utils.py -------------------------------------------------------------------------------- /lightllm/utils/multinode_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/multinode_utils.py -------------------------------------------------------------------------------- /lightllm/utils/net_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/net_utils.py -------------------------------------------------------------------------------- /lightllm/utils/petrel_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/petrel_helper.py -------------------------------------------------------------------------------- /lightllm/utils/process_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/process_check.py -------------------------------------------------------------------------------- /lightllm/utils/profile_max_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/profile_max_tokens.py -------------------------------------------------------------------------------- /lightllm/utils/retry_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/retry_utils.py -------------------------------------------------------------------------------- /lightllm/utils/rpyc_fix_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/rpyc_fix_utils.py -------------------------------------------------------------------------------- /lightllm/utils/sgl_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/sgl_utils.py -------------------------------------------------------------------------------- /lightllm/utils/shm_size_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/shm_size_check.py -------------------------------------------------------------------------------- /lightllm/utils/shm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/shm_utils.py -------------------------------------------------------------------------------- /lightllm/utils/start_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/start_utils.py -------------------------------------------------------------------------------- /lightllm/utils/statics_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/statics_utils.py -------------------------------------------------------------------------------- /lightllm/utils/time_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/time_utils.py -------------------------------------------------------------------------------- /lightllm/utils/torch_ops_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/torch_ops_utils.py -------------------------------------------------------------------------------- /lightllm/utils/tuning_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/tuning_utils.py -------------------------------------------------------------------------------- /lightllm/utils/vllm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/vllm_utils.py -------------------------------------------------------------------------------- /lightllm/utils/watchdog_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/lightllm/utils/watchdog_utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/setup.py -------------------------------------------------------------------------------- /test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_14b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_14b.json -------------------------------------------------------------------------------- /test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_32b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_32b.json -------------------------------------------------------------------------------- /test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_72b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen2.5_72b.json -------------------------------------------------------------------------------- /test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_235b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_235b.json -------------------------------------------------------------------------------- /test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_30b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_head/test_kv_cache_calib_per_head_qwen3_30b.json -------------------------------------------------------------------------------- /test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_14b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_14b.json -------------------------------------------------------------------------------- /test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_32b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_32b.json -------------------------------------------------------------------------------- /test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_72b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen2.5_72b.json -------------------------------------------------------------------------------- /test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen3_30b.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/fp8_calibration_per_tensor/test_kv_cache_calib_per_tensor_qwen3_30b.json -------------------------------------------------------------------------------- /test/advanced_config/mixed_quantization/llamacls-mix-down.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/mixed_quantization/llamacls-mix-down.yaml -------------------------------------------------------------------------------- /test/advanced_config/redundancy_expert/test_redundancy_expert_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/advanced_config/redundancy_expert/test_redundancy_expert_config.json -------------------------------------------------------------------------------- /test/benchmark/kernel/benchmark_fused_moe_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/kernel/benchmark_fused_moe_triton.py -------------------------------------------------------------------------------- /test/benchmark/service/benchmark_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_client.py -------------------------------------------------------------------------------- /test/benchmark/service/benchmark_mcq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_mcq.py -------------------------------------------------------------------------------- /test/benchmark/service/benchmark_prompt_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_prompt_cache.py -------------------------------------------------------------------------------- /test/benchmark/service/benchmark_prompt_cache_multi_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_prompt_cache_multi_server.py -------------------------------------------------------------------------------- /test/benchmark/service/benchmark_qps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_qps.py -------------------------------------------------------------------------------- /test/benchmark/service/benchmark_sharegpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/service/benchmark_sharegpt.py -------------------------------------------------------------------------------- /test/benchmark/static_inference/model_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/static_inference/model_infer.py -------------------------------------------------------------------------------- /test/benchmark/static_inference/model_infer_mtp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/static_inference/model_infer_mtp.py -------------------------------------------------------------------------------- /test/benchmark/static_inference/profile_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/static_inference/profile_demo.py -------------------------------------------------------------------------------- /test/benchmark/static_inference/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/static_inference/test_model.py -------------------------------------------------------------------------------- /test/benchmark/static_inference/test_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/benchmark/static_inference/test_vit.py -------------------------------------------------------------------------------- /test/chat_template/tool_chat_template_deepseekr1.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/chat_template/tool_chat_template_deepseekr1.jinja -------------------------------------------------------------------------------- /test/chat_template/tool_chat_template_deepseekv3.jinjia: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/chat_template/tool_chat_template_deepseekv3.jinjia -------------------------------------------------------------------------------- /test/chat_template/tool_chat_template_deepseekv31.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/chat_template/tool_chat_template_deepseekv31.jinja -------------------------------------------------------------------------------- /test/chat_template/tool_chat_template_deepseekv32.jinjia: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/chat_template/tool_chat_template_deepseekv32.jinjia -------------------------------------------------------------------------------- /test/compare_with_previous_commit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/compare_with_previous_commit.py -------------------------------------------------------------------------------- /test/format_out/gomoku_game.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/format_out/gomoku_game.py -------------------------------------------------------------------------------- /test/format_out/qabot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/format_out/qabot.py -------------------------------------------------------------------------------- /test/format_out/test_constraint_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/format_out/test_constraint_server.py -------------------------------------------------------------------------------- /test/format_out/test_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/format_out/test_demo.py -------------------------------------------------------------------------------- /test/format_out/test_xgrammar_constraint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/format_out/test_xgrammar_constraint.py -------------------------------------------------------------------------------- /test/kernel/deepseekv2_bmm_scaled_fp8_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/deepseekv2_bmm_scaled_fp8_tuning.py -------------------------------------------------------------------------------- /test/kernel/deepseekv2_gqa_decode_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/deepseekv2_gqa_decode_tuning.py -------------------------------------------------------------------------------- /test/kernel/deepseekv3_fp8_block_gemm_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/deepseekv3_fp8_block_gemm_tuning.py -------------------------------------------------------------------------------- /test/kernel/deepseekv3_rotary_emb_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/deepseekv3_rotary_emb_tuning.py -------------------------------------------------------------------------------- /test/kernel/fuse_moe_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/fuse_moe_tuning.py -------------------------------------------------------------------------------- /test/kernel/llama_gqa_decode_vsm_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/llama_gqa_decode_vsm_tuning.py -------------------------------------------------------------------------------- /test/kernel/llama_gqa_diverse_decode_stage1_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/llama_gqa_diverse_decode_stage1_tuning.py -------------------------------------------------------------------------------- /test/kernel/moe_silu_and_mul_tuning_bf16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/moe_silu_and_mul_tuning_bf16.py -------------------------------------------------------------------------------- /test/kernel/moe_sum_reduce_tuning_bf16.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/kernel/moe_sum_reduce_tuning_bf16.py -------------------------------------------------------------------------------- /test/start_scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/README.md -------------------------------------------------------------------------------- /test/start_scripts/draft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/draft.sh -------------------------------------------------------------------------------- /test/start_scripts/multi_node_ep_node0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_node_ep_node0.sh -------------------------------------------------------------------------------- /test/start_scripts/multi_node_ep_node1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_node_ep_node1.sh -------------------------------------------------------------------------------- /test/start_scripts/multi_node_tp_node0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_node_tp_node0.sh -------------------------------------------------------------------------------- /test/start_scripts/multi_node_tp_node1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_node_tp_node1.sh -------------------------------------------------------------------------------- /test/start_scripts/multi_pd_master.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master.sh -------------------------------------------------------------------------------- /test/start_scripts/multi_pd_master/config_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master/config_server.sh -------------------------------------------------------------------------------- /test/start_scripts/multi_pd_master/pd_decode.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master/pd_decode.sh -------------------------------------------------------------------------------- /test/start_scripts/multi_pd_master/pd_master_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master/pd_master_1.sh -------------------------------------------------------------------------------- /test/start_scripts/multi_pd_master/pd_master_2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master/pd_master_2.sh -------------------------------------------------------------------------------- /test/start_scripts/multi_pd_master/pd_prefill.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/multi_pd_master/pd_prefill.sh -------------------------------------------------------------------------------- /test/start_scripts/single_node_ep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_node_ep.sh -------------------------------------------------------------------------------- /test/start_scripts/single_node_tp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_node_tp.sh -------------------------------------------------------------------------------- /test/start_scripts/single_node_tp_cpu_cache_enable.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_node_tp_cpu_cache_enable.sh -------------------------------------------------------------------------------- /test/start_scripts/single_pd_master/pd_decode.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_pd_master/pd_decode.sh -------------------------------------------------------------------------------- /test/start_scripts/single_pd_master/pd_master.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_pd_master/pd_master.sh -------------------------------------------------------------------------------- /test/start_scripts/single_pd_master/pd_nixl_decode.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_pd_master/pd_nixl_decode.sh -------------------------------------------------------------------------------- /test/start_scripts/single_pd_master/pd_nixl_prefill.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_pd_master/pd_nixl_prefill.sh -------------------------------------------------------------------------------- /test/start_scripts/single_pd_master/pd_prefill.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/start_scripts/single_pd_master/pd_prefill.sh -------------------------------------------------------------------------------- /test/test_api/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/test_api/test.jpg -------------------------------------------------------------------------------- /test/test_api/test_generate_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/test_api/test_generate_api.py -------------------------------------------------------------------------------- /test/test_api/test_multimodal_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/test_api/test_multimodal_api.py -------------------------------------------------------------------------------- /test/test_api/test_openai_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/test/test_api/test_openai_api.py -------------------------------------------------------------------------------- /tools/quick_launch_docker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/tools/quick_launch_docker.py -------------------------------------------------------------------------------- /tools/resolve_ptx_version: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/tools/resolve_ptx_version -------------------------------------------------------------------------------- /unit_tests/common/basemodel/triton_kernel/test_add_in_place.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_add_in_place.py -------------------------------------------------------------------------------- /unit_tests/common/basemodel/triton_kernel/test_gen_decode_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_gen_decode_params.py -------------------------------------------------------------------------------- /unit_tests/common/basemodel/triton_kernel/test_gen_mtp_prefill_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_gen_mtp_prefill_params.py -------------------------------------------------------------------------------- /unit_tests/common/basemodel/triton_kernel/test_gen_prefill_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_gen_prefill_params.py -------------------------------------------------------------------------------- /unit_tests/common/basemodel/triton_kernel/test_gen_sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_gen_sampling_params.py -------------------------------------------------------------------------------- /unit_tests/common/basemodel/triton_kernel/test_multimodal_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_multimodal_emb.py -------------------------------------------------------------------------------- /unit_tests/common/basemodel/triton_kernel/test_redundancy_topk_ids_repair.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_redundancy_topk_ids_repair.py -------------------------------------------------------------------------------- /unit_tests/common/basemodel/triton_kernel/test_sp_pad_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/basemodel/triton_kernel/test_sp_pad_kernel.py -------------------------------------------------------------------------------- /unit_tests/common/fused_moe/test_deepep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_deepep.py -------------------------------------------------------------------------------- /unit_tests/common/fused_moe/test_grouped_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_grouped_fused_moe.py -------------------------------------------------------------------------------- /unit_tests/common/fused_moe/test_grouped_fused_moe_speed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_grouped_fused_moe_speed.py -------------------------------------------------------------------------------- /unit_tests/common/fused_moe/test_grouped_topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_grouped_topk.py -------------------------------------------------------------------------------- /unit_tests/common/fused_moe/test_moe_silu_and_mul_mix_quant_ep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_moe_silu_and_mul_mix_quant_ep.py -------------------------------------------------------------------------------- /unit_tests/common/fused_moe/test_softmax_topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/fused_moe/test_softmax_topk.py -------------------------------------------------------------------------------- /unit_tests/common/kv_trans_kernel/test_kv_trans_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/kv_trans_kernel/test_kv_trans_v2.py -------------------------------------------------------------------------------- /unit_tests/common/kv_trans_kernel/test_nixl_kv_trans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/kv_trans_kernel/test_nixl_kv_trans.py -------------------------------------------------------------------------------- /unit_tests/common/quantization/test_fp8_scaled_mm_per_token.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/common/quantization/test_fp8_scaled_mm_per_token.py -------------------------------------------------------------------------------- /unit_tests/models/deepseek2/test_destindex_copy_kv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_destindex_copy_kv.py -------------------------------------------------------------------------------- /unit_tests/models/deepseek2/test_destindex_copy_kv_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_destindex_copy_kv_fp8.py -------------------------------------------------------------------------------- /unit_tests/models/deepseek2/test_gqa_flash_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_gqa_flash_decoding.py -------------------------------------------------------------------------------- /unit_tests/models/deepseek2/test_gqa_flash_decoding_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_gqa_flash_decoding_fp8.py -------------------------------------------------------------------------------- /unit_tests/models/deepseek2/test_repack_kv_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_repack_kv_index.py -------------------------------------------------------------------------------- /unit_tests/models/deepseek2/test_rope_repeat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/deepseek2/test_rope_repeat.py -------------------------------------------------------------------------------- /unit_tests/models/llama/test_context_flashattention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_context_flashattention_nopad.py -------------------------------------------------------------------------------- /unit_tests/models/llama/test_context_flashattention_nopad_fa3_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_context_flashattention_nopad_fa3_fp8.py -------------------------------------------------------------------------------- /unit_tests/models/llama/test_context_flashattention_nopad_flashinfer_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_context_flashattention_nopad_flashinfer_fp8.py -------------------------------------------------------------------------------- /unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse.py -------------------------------------------------------------------------------- /unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage1.py -------------------------------------------------------------------------------- /unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage2.py -------------------------------------------------------------------------------- /unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_ppl_int8kv_flash_decoding_diverse_stage3.py -------------------------------------------------------------------------------- /unit_tests/models/llama/test_token_attention_nopad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_token_attention_nopad.py -------------------------------------------------------------------------------- /unit_tests/models/llama/test_token_attention_nopad_fa3_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_token_attention_nopad_fa3_fp8.py -------------------------------------------------------------------------------- /unit_tests/models/llama/test_token_attention_nopad_flashinfer_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/llama/test_token_attention_nopad_flashinfer_fp8.py -------------------------------------------------------------------------------- /unit_tests/models/qwen2_vl/test_mrope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/qwen2_vl/test_mrope.py -------------------------------------------------------------------------------- /unit_tests/models/qwen2_vl/test_rotary_pos_emb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/qwen2_vl/test_rotary_pos_emb.py -------------------------------------------------------------------------------- /unit_tests/models/vit/test_flash_attention_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/models/vit/test_flash_attention_forward.py -------------------------------------------------------------------------------- /unit_tests/server/core/objs/test_atomic_array_lock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_atomic_array_lock.py -------------------------------------------------------------------------------- /unit_tests/server/core/objs/test_atomic_lock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_atomic_lock.py -------------------------------------------------------------------------------- /unit_tests/server/core/objs/test_out_token_circlequeue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_out_token_circlequeue.py -------------------------------------------------------------------------------- /unit_tests/server/core/objs/test_req.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_req.py -------------------------------------------------------------------------------- /unit_tests/server/core/objs/test_sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_sampling_params.py -------------------------------------------------------------------------------- /unit_tests/server/core/objs/test_shm_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_shm_array.py -------------------------------------------------------------------------------- /unit_tests/server/core/objs/test_shm_req_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/core/objs/test_shm_req_manager.py -------------------------------------------------------------------------------- /unit_tests/server/router/dynamic_prompt/test_radix_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/server/router/dynamic_prompt/test_radix_cache.py -------------------------------------------------------------------------------- /unit_tests/utils/test_custom_kernel_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelTC/LightLLM/HEAD/unit_tests/utils/test_custom_kernel_utils.py --------------------------------------------------------------------------------