├── .clang-format ├── .gitignore ├── .gitmodules ├── .pylintrc ├── 3rdparty ├── CMakeLists.txt └── LLM_kernels │ ├── .clang-format │ ├── .gitignore │ ├── 3rdparty │ ├── deepgemm │ │ └── deepgemm.patch │ └── ini_reader.h │ ├── CMakeLists.txt │ ├── cmake │ ├── ascend.cmake │ ├── base.cmake │ ├── flashinfer.cmake │ ├── fmt.cmake │ ├── module │ │ ├── CMakeCCECompiler.cmake.in │ │ ├── CMakeCCEFunction.cmake │ │ ├── CMakeCCEInformation.cmake │ │ ├── CMakeDetermineCCECompiler.cmake │ │ └── CMakeTestCCECompiler.cmake │ ├── nvidia.cmake │ ├── test.cmake │ └── yaml-cpp.cmake │ ├── csrc │ ├── CMakeLists.txt │ ├── kernels │ │ ├── CMakeLists.txt │ │ ├── ascend │ │ │ ├── CMakeLists.txt │ │ │ ├── atb_plugin_operations │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── acl_nn_operation.cc │ │ │ │ ├── acl_nn_operation.h │ │ │ │ ├── acl_nn_tensor.h │ │ │ │ ├── argmax_operation.cc │ │ │ │ ├── argmax_operation.h │ │ │ │ ├── cast_operation.cc │ │ │ │ ├── cast_operation.h │ │ │ │ ├── utils.cc │ │ │ │ └── utils.h │ │ │ └── attention │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── attention.cc │ │ │ │ └── attention.h │ │ └── nvidia │ │ │ ├── CMakeLists.txt │ │ │ ├── activation │ │ │ ├── CMakeLists.txt │ │ │ ├── activation.cu │ │ │ ├── activation.h │ │ │ ├── activation_test.cu │ │ │ └── activation_test.py │ │ │ ├── add │ │ │ ├── CMakeLists.txt │ │ │ ├── add.cu │ │ │ ├── add.h │ │ │ └── add_test.cu │ │ │ ├── add_mul │ │ │ ├── CMakeLists.txt │ │ │ ├── add_mul.cu │ │ │ ├── add_mul.h │ │ │ └── add_mul_test.cu │ │ │ ├── adjust_mem │ │ │ ├── CMakeLists.txt │ │ │ ├── adjust_mem.cu │ │ │ ├── adjust_mem.h │ │ │ ├── adjust_mem_test.cu │ │ │ └── adjust_mem_test.py │ │ │ ├── alibi │ │ │ ├── CMakeLists.txt │ │ │ ├── alibi.cu │ │ │ ├── alibi.h │ │ │ └── alibi_test.cu │ │ │ ├── all_reduce │ │ │ ├── CMakeLists.txt │ │ │ ├── custom_all_reduce.cu │ │ │ ├── custom_all_reduce.h │ │ │ └── custom_all_reduce_test.cu │ │ │ ├── assemble_tokens_hidden │ │ │ ├── CMakeLists.txt │ │ │ ├── assemble_tokens_hidden.cu │ │ │ ├── assemble_tokens_hidden.h │ │ │ └── assemble_tokens_hidden_test.cu │ │ │ ├── asymmetric_gemm │ │ │ ├── CMakeLists.txt │ │ │ ├── asymmetric_gemm_test.cu │ │ │ ├── asymmetric_gemm_wrapper.cu │ │ │ ├── asymmetric_gemm_wrapper.h │ │ │ ├── cutlass_heuristic.cpp │ │ │ ├── cutlass_heuristic.h │ │ │ ├── cutlass_preprocessors.cpp │ │ │ ├── cutlass_preprocessors.h │ │ │ ├── cutlass_preprocessors_fast.cu │ │ │ ├── cutlass_preprocessors_test.cu │ │ │ ├── cutlass_type_conversion.h │ │ │ ├── fp8_rowwise_gemm │ │ │ │ ├── fp8_rowwise_gemm.h │ │ │ │ ├── fp8_rowwise_gemm_bf16.cu │ │ │ │ ├── fp8_rowwise_gemm_fp16.cu │ │ │ │ ├── fp8_rowwise_gemm_kernel_template_sm90.h │ │ │ │ └── fp8_rowwise_gemm_template.h │ │ │ ├── fpA_intB_gemm │ │ │ │ ├── bf16_int4_gemm_fg_scalebias.cu │ │ │ │ ├── bf16_int4_gemm_fg_scaleonly.cu │ │ │ │ ├── bf16_int4_gemm_per_col.cu │ │ │ │ ├── bf16_int8_gemm_fg_scalebias.cu │ │ │ │ ├── bf16_int8_gemm_fg_scaleonly.cu │ │ │ │ ├── bf16_int8_gemm_per_col.cu │ │ │ │ ├── e4m3_int4_gemm_fg_scalebias_f16_out_f16.cu │ │ │ │ ├── e4m3_int4_gemm_fg_scaleonly_f16_out_f16.cu │ │ │ │ ├── e4m3_int4_gemm_per_col_f16_out_f16.cu │ │ │ │ ├── fp16_int4_gemm_fg_scalebias.cu │ │ │ │ ├── fp16_int4_gemm_fg_scaleonly.cu │ │ │ │ ├── fp16_int4_gemm_per_col.cu │ │ │ │ ├── fp16_int8_gemm_fg_scalebias.cu │ │ │ │ ├── fp16_int8_gemm_fg_scaleonly.cu │ │ │ │ ├── fp16_int8_gemm_per_col.cu │ │ │ │ ├── fpA_intB_gemm.h │ │ │ │ ├── fpA_intB_gemm_template.h │ │ │ │ ├── fpA_intB_gemm_template_sm90.h │ │ │ │ └── launchers │ │ │ │ │ ├── fpA_intB_launcher_sm90.h │ │ │ │ │ └── fpA_intB_launcher_sm90.inl │ │ │ ├── fused_gated_gemm │ │ │ │ ├── fused_gated_gemm.h │ │ │ │ ├── fused_gated_gemm_kernel_template_sm90.h │ │ │ │ ├── fused_gated_gemm_template.h │ │ │ │ └── gemm_swiglu_e4m3.cu │ │ │ ├── moe_gemm │ │ │ │ ├── launchers │ │ │ │ │ ├── fused_moe_gemm_launcher_sm80.h │ │ │ │ │ ├── fused_moe_gemm_launcher_sm80.inl │ │ │ │ │ ├── moe_gemm_launcher_sm90.h │ │ │ │ │ └── moe_gemm_launcher_sm90.inl │ │ │ │ ├── moe_gemm_hopper_input.cu │ │ │ │ ├── moe_gemm_kernels.h │ │ │ │ ├── moe_gemm_kernels_bf16_bf16.cu │ │ │ │ ├── moe_gemm_kernels_bf16_uint4.cu │ │ │ │ ├── moe_gemm_kernels_bf16_uint8.cu │ │ │ │ ├── moe_gemm_kernels_fp16_fp16.cu │ │ │ │ ├── moe_gemm_kernels_fp16_uint4.cu │ │ │ │ ├── moe_gemm_kernels_fp16_uint8.cu │ │ │ │ ├── moe_gemm_kernels_fp32_fp32.cu │ │ │ │ ├── moe_gemm_kernels_fp8_fp8.cu │ │ │ │ ├── moe_gemm_kernels_template.h │ │ │ │ ├── moe_gemm_kernels_template_sm90.h │ │ │ │ └── moe_sm90_traits.h │ │ │ └── python │ │ │ │ └── generate_kernels.py │ │ │ ├── blockwise_gemm │ │ │ ├── CMakeLists.txt │ │ │ ├── blockwise_gemm.cu │ │ │ ├── blockwise_gemm.cuh │ │ │ ├── blockwise_gemm.h │ │ │ ├── blockwise_gemm_test.cu │ │ │ ├── collective │ │ │ │ ├── collective_builder.hpp │ │ │ │ ├── fp8_accumulation.hpp │ │ │ │ └── sm90_mma_tma_gmma_ss_warpspecialized_fp8_blockwise_scaling.hpp │ │ │ ├── cutlass_gemm_caller.cuh │ │ │ └── dispatch_policy.hpp │ │ │ ├── cast │ │ │ ├── CMakeLists.txt │ │ │ ├── cast.cu │ │ │ ├── cast.h │ │ │ └── cast_test.cu │ │ │ ├── common │ │ │ ├── reduce_kernel_utils.cuh │ │ │ └── vec_dtypes.cuh │ │ │ ├── concat │ │ │ ├── CMakeLists.txt │ │ │ ├── concat.cu │ │ │ ├── concat.h │ │ │ └── concat_test.cu │ │ │ ├── cutlass_extensions │ │ │ ├── arch │ │ │ │ ├── copy_red_global.hpp │ │ │ │ └── mma.h │ │ │ ├── compute_occupancy.h │ │ │ ├── epilogue │ │ │ │ ├── collective │ │ │ │ │ └── epilogue_moe_finalize.hpp │ │ │ │ ├── thread │ │ │ │ │ └── fused_activations.h │ │ │ │ └── threadblock │ │ │ │ │ ├── epilogue_per_row_per_col_scale.h │ │ │ │ │ └── epilogue_tensor_op_int32.h │ │ │ ├── epilogue_helpers.h │ │ │ ├── gemm │ │ │ │ ├── collective │ │ │ │ │ ├── builders │ │ │ │ │ │ └── sm90_gmma_builder_gated.inl │ │ │ │ │ ├── collective_builder_gated.hpp │ │ │ │ │ ├── collective_mma_gated.hpp │ │ │ │ │ ├── sm90_mma_gated_tma_gmma_ss_warpspecialized.hpp │ │ │ │ │ └── sm90_mma_gated_tma_gmma_ss_warpspecialized_fp8.hpp │ │ │ │ ├── device │ │ │ │ │ ├── gemm_universal_base_compat.h │ │ │ │ │ └── splitk_gemm_grouped.h │ │ │ │ ├── kernel │ │ │ │ │ ├── cuda_hint.cuh │ │ │ │ │ ├── default_fpA_intB_traits.h │ │ │ │ │ ├── default_int8_traits.h │ │ │ │ │ ├── default_splitk_gemm_grouped.h │ │ │ │ │ ├── fpA_intB_gemm.h │ │ │ │ │ ├── fused_moe_kernel.cuh │ │ │ │ │ ├── fused_moe_kernel_routine.cuh │ │ │ │ │ ├── fused_moe_kernel_traits.cuh │ │ │ │ │ ├── gemm_moe_problem_visitor.h │ │ │ │ │ ├── gemm_universal_gated.hpp │ │ │ │ │ ├── gemm_with_epilogue_visitor.h │ │ │ │ │ ├── mixed_gemm_B_layout.h │ │ │ │ │ ├── moe_cute_util.cuh │ │ │ │ │ ├── moe_cutlass_kernel.h │ │ │ │ │ ├── moe_problem_visitor.h │ │ │ │ │ ├── sm90_gemm_gated_tma_warpspecialized_cooperative.hpp │ │ │ │ │ ├── sm90_gemm_gated_tma_warpspecialized_pingpong.hpp │ │ │ │ │ └── splitk_gemm_grouped.h │ │ │ │ ├── threadblock │ │ │ │ │ ├── default_dq_mma.h │ │ │ │ │ ├── default_dq_mma_multistage.h │ │ │ │ │ ├── default_dq_mma_pipelined.h │ │ │ │ │ ├── default_mma.h │ │ │ │ │ ├── default_mma_bf16.h │ │ │ │ │ ├── dq_mma_base.h │ │ │ │ │ ├── dq_mma_multistage.h │ │ │ │ │ ├── dq_mma_multistage_finegrained.h │ │ │ │ │ ├── dq_mma_multistage_percol.h │ │ │ │ │ ├── dq_mma_pipelined.h │ │ │ │ │ ├── dq_mma_pipelined_finegrained.h │ │ │ │ │ └── dq_mma_pipelined_percol.h │ │ │ │ └── warp │ │ │ │ │ ├── default_mma_tensor_op.h │ │ │ │ │ ├── mma_tensorop_compute_B_with_f16.h │ │ │ │ │ └── mma_tensorop_dequantizer.h │ │ │ ├── gemm_configs.h │ │ │ ├── interleaved_numeric_conversion.h │ │ │ ├── tile_interleaved_layout.h │ │ │ ├── transform │ │ │ │ └── threadblock │ │ │ │ │ └── fine_grained_scale_zero_iterator.h │ │ │ ├── util │ │ │ │ └── gather_tensor.hpp │ │ │ └── weight_only_quant_op.h │ │ │ ├── embedding │ │ │ ├── CMakeLists.txt │ │ │ ├── embedding.cu │ │ │ ├── embedding.h │ │ │ └── embedding_test.cu │ │ │ ├── expand │ │ │ ├── CMakeLists.txt │ │ │ ├── expand.cu │ │ │ ├── expand.h │ │ │ └── expand_test.cu │ │ │ ├── flash_mla │ │ │ ├── CMakeLists.txt │ │ │ ├── flash_mla.cu │ │ │ ├── flash_mla.h │ │ │ ├── flash_mla_test.cu │ │ │ └── kernels │ │ │ │ ├── config.h │ │ │ │ ├── fp8_flash_fwd_mla.h │ │ │ │ ├── fp8_flash_fwd_mla_kernel.h │ │ │ │ ├── fp8_flash_fwd_mla_scalar_bf16_cache_e4m3_sm90.cu │ │ │ │ ├── fp8_flash_fwd_mla_scalar_fp16_cache_e4m3_sm90.cu │ │ │ │ ├── fp8_named_barrier.h │ │ │ │ ├── fp8_softmax.h │ │ │ │ ├── fp8_transpose_v.h │ │ │ │ ├── fp8_utils.h │ │ │ │ ├── get_mla_metadata.cu │ │ │ │ ├── get_mla_metadata.h │ │ │ │ ├── mla_combine.cu │ │ │ │ ├── mla_combine.h │ │ │ │ ├── params.h │ │ │ │ ├── splitkv_mla.cu │ │ │ │ ├── splitkv_mla.h │ │ │ │ ├── traits.h │ │ │ │ └── utils.h │ │ │ ├── fused_add_norm │ │ │ ├── CMakeLists.txt │ │ │ ├── fused_add_norm.cu │ │ │ ├── fused_add_norm.h │ │ │ ├── fused_add_norm_test.cu │ │ │ └── fused_add_norm_test.py │ │ │ ├── gemm │ │ │ ├── CMakeLists.txt │ │ │ └── deepgemm │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── deepgemm_wrapper.cu │ │ │ │ └── deepgemm_wrapper.h │ │ │ ├── gemm_wrapper │ │ │ ├── CMakeLists.txt │ │ │ ├── gemm_algo_map.cu │ │ │ ├── gemm_algo_map.h │ │ │ ├── gemm_algo_map_test.cu │ │ │ ├── gemm_wrapper.cu │ │ │ ├── gemm_wrapper.h │ │ │ └── gemm_wrapper_test.cu │ │ │ ├── gptq_marlin │ │ │ ├── CMakeLists.txt │ │ │ ├── awq_marlin_repack.cu │ │ │ ├── dequant.h │ │ │ ├── gptq_marlin.cu │ │ │ ├── gptq_marlin_repack.cu │ │ │ ├── kernel_bf16_kfe2m1f.cu │ │ │ ├── kernel_bf16_kfe4m3fn.cu │ │ │ ├── kernel_bf16_ku4.cu │ │ │ ├── kernel_bf16_ku4b8.cu │ │ │ ├── kernel_bf16_ku8b128.cu │ │ │ ├── kernel_fp16_kfe2m1f.cu │ │ │ ├── kernel_fp16_kfe4m3fn.cu │ │ │ ├── kernel_fp16_ku4.cu │ │ │ ├── kernel_fp16_ku4b8.cu │ │ │ ├── kernel_fp16_ku8b128.cu │ │ │ ├── marlin.cuh │ │ │ ├── marlin_dtypes.cuh │ │ │ ├── marlin_template.h │ │ │ ├── marlin_wrapper.h │ │ │ └── marlin_wrapper_test.cu │ │ │ ├── grouped_topk │ │ │ ├── CMakeLists.txt │ │ │ ├── grouped_topk.cu │ │ │ ├── grouped_topk.h │ │ │ └── grouped_topk_test.cu │ │ │ ├── identity │ │ │ ├── CMakeLists.txt │ │ │ ├── identity.cu │ │ │ ├── identity.h │ │ │ └── identity_test.cu │ │ │ ├── layernorm │ │ │ ├── CMakeLists.txt │ │ │ ├── layernorm.cu │ │ │ ├── layernorm.h │ │ │ ├── layernorm_test.cu │ │ │ └── layernorm_test.py │ │ │ ├── logits_topk │ │ │ ├── CMakeLists.txt │ │ │ ├── logits_topk.cu │ │ │ ├── logits_topk.h │ │ │ └── logits_topk_test.cu │ │ │ ├── machete │ │ │ ├── CMakeLists.txt │ │ │ ├── cutlass_extensions │ │ │ │ ├── common.cpp │ │ │ │ ├── common.hpp │ │ │ │ ├── cute_utils.cuh │ │ │ │ ├── epilogue │ │ │ │ │ ├── broadcast_load_epilogue_c2x.hpp │ │ │ │ │ ├── broadcast_load_epilogue_c3x.hpp │ │ │ │ │ ├── scaled_mm_epilogues_c2x.hpp │ │ │ │ │ └── scaled_mm_epilogues_c3x.hpp │ │ │ │ ├── gemm │ │ │ │ │ ├── collective │ │ │ │ │ │ ├── collective_builder.hpp │ │ │ │ │ │ ├── fp8_accumulation.hpp │ │ │ │ │ │ └── sm90_mma_tma_gmma_ss_warpspecialized_fp8_blockwise_scaling.hpp │ │ │ │ │ └── dispatch_policy.hpp │ │ │ │ ├── torch_utils.hpp │ │ │ │ ├── vllm_collective_builder.cuh │ │ │ │ ├── vllm_custom_types.cuh │ │ │ │ ├── vllm_numeric_conversion.cuh │ │ │ │ └── vllm_type_utils.cuh │ │ │ ├── generate.py │ │ │ ├── machete_collective_builder.cuh │ │ │ ├── machete_interleaving_utils.cuh │ │ │ ├── machete_mainloop.cuh │ │ │ ├── machete_mm_kernel.cuh │ │ │ ├── machete_mm_launcher.cuh │ │ │ ├── machete_prepack_kernel.cuh │ │ │ ├── machete_prepack_launcher.cuh │ │ │ ├── machete_prepacked_layout.cuh │ │ │ ├── machete_wrapper.cu │ │ │ ├── machete_wrapper.h │ │ │ └── machete_wrapper_test.cu │ │ │ ├── marlin_moe │ │ │ ├── CMakeLists.txt │ │ │ ├── fused_marlin_moe.cu │ │ │ ├── fused_marlin_moe.h │ │ │ ├── marlin_kernels │ │ │ │ ├── marlin_moe_kernel.h │ │ │ │ ├── marlin_moe_kernel_ku4.cu │ │ │ │ ├── marlin_moe_kernel_ku4.h │ │ │ │ ├── marlin_moe_kernel_ku4b8.cu │ │ │ │ ├── marlin_moe_kernel_ku4b8.h │ │ │ │ ├── marlin_moe_kernel_ku8b128.cu │ │ │ │ └── marlin_moe_kernel_ku8b128.h │ │ │ ├── marlin_moe_ops.cu │ │ │ ├── marlin_moe_ops.h │ │ │ ├── moe_align_sum_kernels.cu │ │ │ └── moe_align_sum_kernels.h │ │ │ ├── mixture_of_experts │ │ │ ├── CMakeLists.txt │ │ │ ├── moe_kernels.cu │ │ │ ├── moe_kernels.h │ │ │ ├── moe_norm_config.h │ │ │ ├── moe_test.cu │ │ │ ├── moe_wrapper.cu │ │ │ └── moe_wrapper.h │ │ │ ├── moe │ │ │ ├── CMakeLists.txt │ │ │ ├── common.py │ │ │ ├── cutlass_moe │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── cutlass_moe_wrapper.cpp │ │ │ │ └── cutlass_moe_wrapper.h │ │ │ ├── expert_map │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── expert_map.cu │ │ │ │ ├── expert_map.h │ │ │ │ └── expert_map_test.cu │ │ │ ├── fused_moe │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── best_config.json │ │ │ │ ├── fused_moe.py │ │ │ │ ├── fused_moe_creator.sh │ │ │ │ ├── fused_moe_test.cpp │ │ │ │ └── readme.md │ │ │ ├── fused_moe_gptq_int4_fp8_kernel │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── dequant.cu │ │ │ │ ├── dequant.h │ │ │ │ ├── dequant_test.cu │ │ │ │ ├── fused_moe_gptq_int4_fp8_kernel.py │ │ │ │ ├── fused_moe_gptq_int4_fp8_kernel_creator.sh │ │ │ │ ├── per_tensor_quant_by_scale.cu │ │ │ │ ├── per_tensor_quant_by_scale.h │ │ │ │ └── per_tensor_quant_by_scale_test.cu │ │ │ ├── fused_moe_kernel_gptq_awq │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── fused_moe_kernel_gptq_awq.py │ │ │ │ └── fused_moe_kernel_gptq_awq_creator.sh │ │ │ └── moe_wna16 │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── moe_wna16.cu │ │ │ │ ├── moe_wna16.h │ │ │ │ └── moe_wna16_utils.h │ │ │ ├── moe_utils │ │ │ ├── CMakeLists.txt │ │ │ ├── moe_utils.cu │ │ │ ├── moe_utils.h │ │ │ └── moe_utils_test.cu │ │ │ ├── others │ │ │ ├── CMakeLists.txt │ │ │ ├── README_cn.md │ │ │ ├── sglang │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ └── main │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── elementwise │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── concat_mla.cu │ │ │ │ │ ├── concat_mla.h │ │ │ │ │ └── concat_mla_test.cu │ │ │ │ │ └── quantization │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ └── fp8 │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── per_token_group_quant.cu │ │ │ │ │ ├── per_token_group_quant.h │ │ │ │ │ └── per_token_group_quant_test.cu │ │ │ ├── tensorrt-llm │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── README.md │ │ │ │ ├── dev │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── common │ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ │ ├── NvInferDataType.h │ │ │ │ │ │ ├── algorithm.h │ │ │ │ │ │ ├── cublasMMWrapper.cpp │ │ │ │ │ │ ├── cublasMMWrapper.h │ │ │ │ │ │ ├── cublasVersionCheck.h │ │ │ │ │ │ ├── cudaBf16Fallbacks.cuh │ │ │ │ │ │ ├── cudaBf16Wrapper.h │ │ │ │ │ │ ├── cudaDriverWrapper.cpp │ │ │ │ │ │ ├── cudaDriverWrapper.h │ │ │ │ │ │ ├── cudaFp8Utils.cu │ │ │ │ │ │ ├── cudaFp8Utils.h │ │ │ │ │ │ ├── cudaTypeUtils.cuh │ │ │ │ │ │ ├── cudaUtils.h │ │ │ │ │ │ ├── dataType.h │ │ │ │ │ │ ├── envUtils.cpp │ │ │ │ │ │ ├── envUtils.h │ │ │ │ │ │ ├── logger.cpp │ │ │ │ │ │ ├── logger.h │ │ │ │ │ │ ├── memoryUtils.cu │ │ │ │ │ │ ├── memoryUtils.h │ │ │ │ │ │ ├── quantTypeUtils.cuh │ │ │ │ │ │ ├── quantization.h │ │ │ │ │ │ ├── reduceKernelUtils.cuh │ │ │ │ │ │ ├── stringUtils.cpp │ │ │ │ │ │ ├── stringUtils.h │ │ │ │ │ │ ├── tllmException.cpp │ │ │ │ │ │ ├── tllmException.h │ │ │ │ │ │ ├── utils.h │ │ │ │ │ │ └── workspace.h │ │ │ │ │ ├── cutlass_extensions │ │ │ │ │ │ ├── arch │ │ │ │ │ │ │ ├── copy_red_global.hpp │ │ │ │ │ │ │ ├── copy_sm90_multimem.hpp │ │ │ │ │ │ │ ├── copy_traits_sm90_multimem.hpp │ │ │ │ │ │ │ ├── grid_dependency_control.h │ │ │ │ │ │ │ └── mma.h │ │ │ │ │ │ ├── compute_occupancy.h │ │ │ │ │ │ ├── detail │ │ │ │ │ │ │ └── collective │ │ │ │ │ │ │ │ └── mixed_input_utils.hpp │ │ │ │ │ │ ├── epilogue │ │ │ │ │ │ │ ├── fusion │ │ │ │ │ │ │ │ └── sm90_visitor_scatter.hpp │ │ │ │ │ │ │ ├── thread │ │ │ │ │ │ │ │ └── fused_activations.h │ │ │ │ │ │ │ └── threadblock │ │ │ │ │ │ │ │ ├── epilogue_per_row_per_col_scale.h │ │ │ │ │ │ │ │ └── epilogue_tensor_op_int32.h │ │ │ │ │ │ ├── epilogue_helpers.h │ │ │ │ │ │ ├── gemm │ │ │ │ │ │ │ ├── collective │ │ │ │ │ │ │ │ ├── builders │ │ │ │ │ │ │ │ │ ├── sm90_gmma_builder_gated.inl │ │ │ │ │ │ │ │ │ ├── sm90_gmma_builder_interleaved.inl │ │ │ │ │ │ │ │ │ └── sm90_gmma_builder_mixed_input.inl │ │ │ │ │ │ │ │ ├── collective_builder_gated.hpp │ │ │ │ │ │ │ │ ├── collective_builder_interleaved.hpp │ │ │ │ │ │ │ │ ├── collective_builder_mixed_input.hpp │ │ │ │ │ │ │ │ ├── collective_mma_array_mixed_input.hpp │ │ │ │ │ │ │ │ ├── collective_mma_gated.hpp │ │ │ │ │ │ │ │ ├── collective_mma_interleaved.hpp │ │ │ │ │ │ │ │ ├── sm90_mma_array_tma_gmma_rs_warpspecialized_mixed_input_.hpp │ │ │ │ │ │ │ │ ├── sm90_mma_gated_tma_gmma_ss_warpspecialized.hpp │ │ │ │ │ │ │ │ ├── sm90_mma_gated_tma_gmma_ss_warpspecialized_fp8.hpp │ │ │ │ │ │ │ │ └── sm90_mma_interleaved_tma_gmma_rs_warpspecialized_mixed_input.hpp │ │ │ │ │ │ │ ├── device │ │ │ │ │ │ │ │ ├── gemm_universal_base_compat.h │ │ │ │ │ │ │ │ └── splitk_gemm_grouped.h │ │ │ │ │ │ │ ├── kernel │ │ │ │ │ │ │ │ ├── default_fpA_intB_traits.h │ │ │ │ │ │ │ │ ├── default_int8_traits.h │ │ │ │ │ │ │ │ ├── default_splitk_gemm_grouped.h │ │ │ │ │ │ │ │ ├── fpA_intB_gemm.h │ │ │ │ │ │ │ │ ├── fused_moe_kernel.cuh │ │ │ │ │ │ │ │ ├── fused_moe_kernel_routine.cuh │ │ │ │ │ │ │ │ ├── fused_moe_kernel_traits.cuh │ │ │ │ │ │ │ │ ├── gemm_moe_problem_visitor.h │ │ │ │ │ │ │ │ ├── gemm_universal_gated.hpp │ │ │ │ │ │ │ │ ├── gemm_with_epilogue_visitor.h │ │ │ │ │ │ │ │ ├── mixed_gemm_B_layout.h │ │ │ │ │ │ │ │ ├── moe_cute_util.cuh │ │ │ │ │ │ │ │ ├── moe_cutlass_kernel.h │ │ │ │ │ │ │ │ ├── moe_problem_visitor.h │ │ │ │ │ │ │ │ ├── sm90_gemm_gated_tma_warpspecialized_cooperative.hpp │ │ │ │ │ │ │ │ ├── sm90_gemm_gated_tma_warpspecialized_pingpong.hpp │ │ │ │ │ │ │ │ └── splitk_gemm_grouped.h │ │ │ │ │ │ │ ├── threadblock │ │ │ │ │ │ │ │ ├── default_dq_mma.h │ │ │ │ │ │ │ │ ├── default_dq_mma_multistage.h │ │ │ │ │ │ │ │ ├── default_dq_mma_pipelined.h │ │ │ │ │ │ │ │ ├── default_mma.h │ │ │ │ │ │ │ │ ├── default_mma_bf16.h │ │ │ │ │ │ │ │ ├── dq_mma_base.h │ │ │ │ │ │ │ │ ├── dq_mma_multistage.h │ │ │ │ │ │ │ │ ├── dq_mma_multistage_finegrained.h │ │ │ │ │ │ │ │ ├── dq_mma_multistage_percol.h │ │ │ │ │ │ │ │ ├── dq_mma_pipelined.h │ │ │ │ │ │ │ │ ├── dq_mma_pipelined_finegrained.h │ │ │ │ │ │ │ │ └── dq_mma_pipelined_percol.h │ │ │ │ │ │ │ └── warp │ │ │ │ │ │ │ │ ├── default_mma_tensor_op.h │ │ │ │ │ │ │ │ ├── mma_tensorop_compute_B_with_f16.h │ │ │ │ │ │ │ │ └── mma_tensorop_dequantizer.h │ │ │ │ │ │ ├── gemm_configs.h │ │ │ │ │ │ ├── interleaved_numeric_conversion.h │ │ │ │ │ │ ├── system_barrier.h │ │ │ │ │ │ ├── tile_interleaved_layout.h │ │ │ │ │ │ ├── transform │ │ │ │ │ │ │ └── threadblock │ │ │ │ │ │ │ │ └── fine_grained_scale_zero_iterator.h │ │ │ │ │ │ ├── util │ │ │ │ │ │ │ └── gather_tensor.hpp │ │ │ │ │ │ └── weight_only_quant_op.h │ │ │ │ │ ├── cutlass_kernels │ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ │ ├── cutlass_heuristic.cpp │ │ │ │ │ │ ├── cutlass_heuristic.h │ │ │ │ │ │ ├── cutlass_type_conversion.h │ │ │ │ │ │ ├── fp8_blockscale_gemm │ │ │ │ │ │ │ ├── ada_blockwise_gemm │ │ │ │ │ │ │ │ ├── sm89_fp8_gemm_1d1d.cuh │ │ │ │ │ │ │ │ └── sm89_utils.cuh │ │ │ │ │ │ │ ├── fp8_blockscale_gemm.cu │ │ │ │ │ │ │ ├── fp8_blockscale_gemm.h │ │ │ │ │ │ │ ├── fp8_blockscale_gemm_kernel.cuh │ │ │ │ │ │ │ ├── fp8_blockscale_mma_utils.cuh │ │ │ │ │ │ │ └── fp8_blockscale_tma_utils.cuh │ │ │ │ │ │ ├── fpA_intB_gemm │ │ │ │ │ │ │ ├── bf16_int4_gemm_fg_scalebias.cu │ │ │ │ │ │ │ ├── bf16_int4_gemm_fg_scaleonly.cu │ │ │ │ │ │ │ ├── bf16_int4_gemm_per_col.cu │ │ │ │ │ │ │ ├── bf16_int8_gemm_fg_scalebias.cu │ │ │ │ │ │ │ ├── bf16_int8_gemm_fg_scaleonly.cu │ │ │ │ │ │ │ ├── bf16_int8_gemm_per_col.cu │ │ │ │ │ │ │ ├── e4m3_int4_gemm_fg_scalebias_bf16_out_bf16.cu │ │ │ │ │ │ │ ├── e4m3_int4_gemm_fg_scalebias_f16_out_f16.cu │ │ │ │ │ │ │ ├── e4m3_int4_gemm_fg_scaleonly_bf16_out_bf16.cu │ │ │ │ │ │ │ ├── e4m3_int4_gemm_fg_scaleonly_f16_out_f16.cu │ │ │ │ │ │ │ ├── e4m3_int4_gemm_per_col_f16_out_f16.cu │ │ │ │ │ │ │ ├── fp16_int4_gemm_fg_scalebias.cu │ │ │ │ │ │ │ ├── fp16_int4_gemm_fg_scaleonly.cu │ │ │ │ │ │ │ ├── fp16_int4_gemm_per_col.cu │ │ │ │ │ │ │ ├── fp16_int8_gemm_fg_scalebias.cu │ │ │ │ │ │ │ ├── fp16_int8_gemm_fg_scaleonly.cu │ │ │ │ │ │ │ ├── fp16_int8_gemm_per_col.cu │ │ │ │ │ │ │ ├── fpA_intB_gemm.h │ │ │ │ │ │ │ ├── fpA_intB_gemm_template.h │ │ │ │ │ │ │ ├── fpA_intB_gemm_template_sm90.h │ │ │ │ │ │ │ └── launchers │ │ │ │ │ │ │ │ ├── fpA_intB_launcher_sm90.h │ │ │ │ │ │ │ │ └── fpA_intB_launcher_sm90.inl │ │ │ │ │ │ ├── include │ │ │ │ │ │ │ ├── common.h │ │ │ │ │ │ │ ├── cutlass_kernel_selector.h │ │ │ │ │ │ │ ├── moe_gemm_kernels.h │ │ │ │ │ │ │ ├── moe_kernels.h │ │ │ │ │ │ │ └── moe_util_kernels.h │ │ │ │ │ │ ├── moe_gemm │ │ │ │ │ │ │ ├── launchers │ │ │ │ │ │ │ │ ├── fused_moe_gemm_launcher_sm80.h │ │ │ │ │ │ │ │ ├── fused_moe_gemm_launcher_sm80.inl │ │ │ │ │ │ │ │ ├── moe_gemm_tma_ws_launcher.h │ │ │ │ │ │ │ │ ├── moe_gemm_tma_ws_launcher.inl │ │ │ │ │ │ │ │ ├── moe_gemm_tma_ws_mixed_input_launcher.h │ │ │ │ │ │ │ │ └── moe_gemm_tma_ws_mixed_input_launcher.inl │ │ │ │ │ │ │ ├── moe_gemm_kernels_bf16_bf16.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_bf16_fp4.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_bf16_fp8.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_bf16_uint4.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_bf16_uint8.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_fp16_fp16.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_fp16_fp4.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_fp16_uint4.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_fp16_uint8.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_fp32_fp32.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_fp4_fp4.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_fp8_fp4.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_fp8_fp8.cu │ │ │ │ │ │ │ ├── moe_gemm_kernels_fp8_uint4.cu │ │ │ │ │ │ │ ├── moe_gemm_template_dispatch.h │ │ │ │ │ │ │ ├── moe_gemm_template_dispatch_tma_ws.h │ │ │ │ │ │ │ ├── moe_gemm_template_dispatch_tma_ws_mixed_dtype.h │ │ │ │ │ │ │ ├── moe_gemm_tma_warp_specialized_input.cu │ │ │ │ │ │ │ ├── moe_kernels.cu │ │ │ │ │ │ │ └── moe_tma_warp_specialized_traits.h │ │ │ │ │ │ └── python │ │ │ │ │ │ │ └── generate_kernels.py │ │ │ │ │ ├── deep_gemm │ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ │ ├── compiler.cuh │ │ │ │ │ │ ├── deepgemm_test.cu │ │ │ │ │ │ ├── fp8_gemm.cuh │ │ │ │ │ │ ├── fp8_gemm_impl.cuh │ │ │ │ │ │ ├── jit_utils.cuh │ │ │ │ │ │ ├── mma_utils.cuh │ │ │ │ │ │ ├── nvrtc_cutlass.cuh │ │ │ │ │ │ ├── nvrtc_std.cuh │ │ │ │ │ │ ├── runtime.cuh │ │ │ │ │ │ ├── scheduler.cuh │ │ │ │ │ │ ├── tma_utils.cuh │ │ │ │ │ │ └── utils.cuh │ │ │ │ │ ├── thop │ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ │ ├── finegrained_mixed_dtype_gemm_thop.cpp │ │ │ │ │ │ ├── finegrained_mixed_dtype_gemm_thop.h │ │ │ │ │ │ ├── finegrained_mixed_dtype_gemm_thop_test.cu │ │ │ │ │ │ ├── moeOp.cpp │ │ │ │ │ │ ├── moeOp.h │ │ │ │ │ │ ├── moeOp_test.cu │ │ │ │ │ │ ├── torch_utils.h │ │ │ │ │ │ └── utils.h │ │ │ │ │ └── utils │ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ │ ├── preQuantScaleKernel.cu │ │ │ │ │ │ ├── preQuantScaleKernel.h │ │ │ │ │ │ ├── quantization.cu │ │ │ │ │ │ ├── quantization.cuh │ │ │ │ │ │ └── quantization.h │ │ │ │ └── main │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ └── communication_kernels │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── trtllm_all_reduce.cu │ │ │ │ │ ├── trtllm_all_reduce.h │ │ │ │ │ └── trtllm_all_reduce_test.cu │ │ │ └── vllm │ │ │ │ └── README.md │ │ │ ├── paged_attention │ │ │ ├── CMakeLists.txt │ │ │ ├── cache_copy.cu │ │ │ ├── cache_copy.h │ │ │ ├── cache_copy_flash_attn_layout.cu │ │ │ ├── cache_copy_flash_attn_layout.h │ │ │ ├── dtype_bfloat16.cuh │ │ │ ├── dtype_float16.cuh │ │ │ ├── dtype_float32.cuh │ │ │ ├── dtype_fp8.cuh │ │ │ ├── mla_cache_copy.cu │ │ │ ├── mla_cache_copy.h │ │ │ ├── mla_cache_copy_test.cu │ │ │ ├── numerous_tensor.h │ │ │ ├── paged_attention.cu │ │ │ ├── paged_attention.h │ │ │ ├── paged_attention_dtypes.h │ │ │ ├── paged_attention_generic.cuh │ │ │ ├── paged_attention_scalar_bf16_cache_auto.cu │ │ │ ├── paged_attention_scalar_bf16_cache_e4m3.cu │ │ │ ├── paged_attention_scalar_bf16_cache_e5m2.cu │ │ │ ├── paged_attention_scalar_fp16_cache_auto.cu │ │ │ ├── paged_attention_scalar_fp16_cache_e4m3.cu │ │ │ ├── paged_attention_scalar_fp16_cache_e5m2.cu │ │ │ ├── paged_attention_scalar_fp32_cache_auto.cu │ │ │ ├── paged_attention_scalar_fp32_cache_e4m3.cu │ │ │ ├── paged_attention_scalar_fp32_cache_e5m2.cu │ │ │ ├── paged_attention_test.cu │ │ │ ├── paged_attention_utils.cuh │ │ │ └── quant_utils.cuh │ │ │ ├── permute │ │ │ ├── CMakeLists.txt │ │ │ ├── nd_index_offset_helper.h │ │ │ ├── permute.cu │ │ │ ├── permute.h │ │ │ └── permute_test.cu │ │ │ ├── rotary_embedding │ │ │ ├── CMakeLists.txt │ │ │ ├── rotary_embedding.cu │ │ │ ├── rotary_embedding.h │ │ │ └── rotary_embedding_test.cu │ │ │ ├── samplers │ │ │ ├── CMakeLists.txt │ │ │ ├── apply_token_bitmask_inplace.cu │ │ │ ├── apply_token_bitmask_inplace.h │ │ │ ├── copy_elements.cu │ │ │ ├── copy_elements.cuh │ │ │ ├── decoding_common.cu │ │ │ ├── decoding_common.h │ │ │ ├── greedy.cu │ │ │ ├── greedy.h │ │ │ ├── repetition_penalty.cu │ │ │ ├── repetition_penalty.h │ │ │ ├── samplers_test.cu │ │ │ ├── sampling_topk_kernels.cu │ │ │ └── sampling_topk_kernels.h │ │ │ ├── split │ │ │ ├── CMakeLists.txt │ │ │ ├── split.cu │ │ │ ├── split.h │ │ │ ├── split_test.cu │ │ │ └── split_test.py │ │ │ ├── weight_only_batched_gemv │ │ │ ├── CMakeLists.txt │ │ │ ├── common.h │ │ │ ├── converter.h │ │ │ ├── details.h │ │ │ ├── kernel.h │ │ │ ├── kernelDispatcher.h │ │ │ ├── kernelDispatcherBf16Int4GroupwiseColumnMajorFalse.cu │ │ │ ├── kernelDispatcherBf16Int4GroupwiseColumnMajorInterleavedTrue.cu │ │ │ ├── kernelDispatcherBf16Int4PerChannelColumnMajorFalse.cu │ │ │ ├── kernelDispatcherBf16Int4PerChannelColumnMajorInterleavedTrue.cu │ │ │ ├── kernelDispatcherBf16Int8PerChannelColumnMajorFalse.cu │ │ │ ├── kernelDispatcherBf16Int8PerChannelColumnMajorInterleavedTrue.cu │ │ │ ├── kernelDispatcherFp16Int4GroupwiseColumnMajorFalse.cu │ │ │ ├── kernelDispatcherFp16Int4GroupwiseColumnMajorInterleavedTrue.cu │ │ │ ├── kernelDispatcherFp16Int4PerChannelColumnMajorFalse.cu │ │ │ ├── kernelDispatcherFp16Int4PerChannelColumnMajorInterleavedTrue.cu │ │ │ ├── kernelDispatcherFp16Int4PerChannelColumnMajorTrue.cu │ │ │ ├── kernelDispatcherFp16Int8PerChannelColumnMajorFalse.cu │ │ │ ├── kernelDispatcherFp16Int8PerChannelColumnMajorInterleavedTrue.cu │ │ │ ├── kernelDispatcherFp16Int8PerChannelColumnMajorTrue.cu │ │ │ ├── kernelLauncher.h │ │ │ ├── utility.h │ │ │ ├── weight_only_gemv_wrapper.cu │ │ │ └── weight_only_gemv_wrapper.h │ │ │ └── weight_scale │ │ │ ├── CMakeLists.txt │ │ │ ├── weight_scale_kernel.cu │ │ │ ├── weight_scale_kernel.h │ │ │ └── weight_scale_test.cu │ └── utils │ │ ├── CMakeLists.txt │ │ ├── ascend │ │ ├── CMakeLists.txt │ │ ├── atb_executor.cc │ │ ├── atb_executor.h │ │ ├── common.cc │ │ ├── common.h │ │ └── tiling_data_types.h │ │ ├── common.cc │ │ ├── common.h │ │ ├── nvidia │ │ ├── CMakeLists.txt │ │ ├── assert.cpp │ │ ├── assert.h │ │ ├── cuda_bf16_fallbacks.cuh │ │ ├── cuda_bf16_wrapper.h │ │ ├── cuda_fp8_utils.cu │ │ ├── cuda_fp8_utils.h │ │ ├── cuda_type_utils.cuh │ │ ├── cuda_utils.cu │ │ ├── cuda_utils.h │ │ ├── kllm_exception.cpp │ │ ├── kllm_exception.h │ │ ├── quantization.h │ │ ├── scalar_type.hpp │ │ ├── string_utils.cpp │ │ ├── string_utils.h │ │ ├── utils_test.cu │ │ └── workspace.h │ │ ├── quant_type.h │ │ └── zixiao │ │ └── CMakeLists.txt │ ├── tests │ ├── CMakeLists.txt │ ├── kernels │ │ ├── CMakeLists.txt │ │ ├── ascend │ │ │ └── utils │ │ │ │ └── testsuit_base.h │ │ ├── data │ │ │ └── sampler │ │ │ │ └── greedy │ │ │ │ ├── input_float.npy │ │ │ │ └── input_float_10x32000.npy │ │ └── nvidia │ │ │ ├── CMakeLists.txt │ │ │ └── utils │ │ │ └── testsuit_base.h │ └── references │ │ ├── CMakeLists.txt │ │ ├── argmax.h │ │ ├── deepseek_v3_grouped_topk.h │ │ ├── matmul.h │ │ ├── permute.h │ │ └── rms_layernorm.h │ └── tools │ ├── get_nvidia_gpu_properties.py │ ├── search_best_gemm_algo │ ├── CMakeLists.txt │ ├── README-cn.md │ ├── README.md │ ├── gemm_algo_config_generator.py │ ├── gemm_algo_map_demo.yaml │ ├── gemm_problem_space_template.csv │ ├── load_yaml_config.py │ └── search_best_gemm_algo.cc │ └── touch_host_stub.sh ├── CMakeLists.txt ├── LICENSE ├── README.md ├── README_cn.md ├── benchmarks ├── README.md ├── bench_one_batch.sh ├── benchmark_input.csv ├── benchmark_throughput.py ├── check_diff.py ├── longbench_reader.py ├── prefix_cache_config.yaml ├── prefix_cache_reader.py └── share_gpt_500.csv ├── cmake ├── FindCUDNN.cmake ├── FindNCCL.cmake ├── FlashAttention3AutoBuild.cmake ├── LLM_kernels.cmake ├── ascend.cmake ├── base.cmake ├── external │ ├── abseil-cpp.cmake │ ├── base64.cmake │ ├── boost.cmake │ ├── cppzmq.cmake │ ├── fmt.cmake │ ├── gflags.cmake │ ├── gtest.cmake │ ├── httplib.cmake │ ├── libzmq.cmake │ ├── loguru.cmake │ ├── msgpack.cmake │ ├── nlohmann_json.cmake │ ├── opentelemetry.cmake │ ├── polaris_cpp.cmake │ ├── protobuf.cmake │ ├── pybind11.cmake │ ├── re2.cmake │ ├── tbb.cmake │ ├── xgrammar.cmake │ └── yaml-cpp.cmake ├── internal.cmake ├── module │ ├── CMakeCCECompiler.cmake.in │ ├── CMakeCCEFunction.cmake │ ├── CMakeCCEInformation.cmake │ ├── CMakeDetermineCCECompiler.cmake │ └── CMakeTestCCECompiler.cmake ├── nvidia.cmake └── zixiao.cmake ├── docker ├── Dockerfile.gpu ├── Dockerfile.npu └── Dockerfile.tencentos4.gpu ├── docs ├── img │ └── webchat-github.jpg ├── monitoring_metrics_guide.md └── technology │ └── design │ ├── kvcache-relationship-between-ascend-atb-and-ksana-cn.md │ └── kvcache-relationship-between-ascend-atb-and-ksana.md ├── examples ├── CMakeLists.txt ├── api_client.py ├── api_demo.py ├── deepseek_fp8_perf.yaml ├── deepseek_int4_perf.yaml ├── deepseekv2 │ ├── config.json │ └── ksana_llm_deepseek_v2_tp2_dp2.yaml ├── ksana_llm.yaml ├── ksana_llm2-7b.yaml ├── ksana_llm_ascend_llama13b.yaml ├── ksana_llm_ascend_llama13b_multi_npu.yaml ├── ksana_llm_deepseekv2.yaml ├── ksana_llm_disaggregating_pd.yaml ├── ksana_llm_distributed.yaml ├── ksana_llm_ixc.yaml ├── ksana_llm_llama2_13b.yaml ├── ksana_llm_qwen2.yaml ├── ksana_llm_qwen3_32b.yaml ├── ksana_llm_qwenvl.yaml ├── llama7b │ ├── ksana_llm.yaml │ ├── ksana_llm_ascend.yaml │ ├── ksana_llm_blocked_prefill.yaml │ ├── ksana_llm_dp.yaml │ ├── ksana_llm_tp.yaml │ └── serving_client.py └── streaming_client.py ├── requirements.txt ├── setup.cfg ├── setup.py ├── src ├── CMakeLists.txt └── ksana_llm │ ├── CMakeLists.txt │ ├── batch_manager │ ├── CMakeLists.txt │ ├── batch_manager.cpp │ ├── batch_manager.h │ ├── batch_manager_test.cpp │ ├── schedule_processor.cpp │ ├── schedule_processor.h │ ├── schedule_processor_interface.h │ └── schedule_processor_test.cpp │ ├── batch_scheduler │ ├── CMakeLists.txt │ ├── batch_scheduler.cpp │ ├── batch_scheduler.h │ ├── batch_scheduler_balance_reqs_algo.cpp │ ├── batch_scheduler_balance_reqs_algo.h │ ├── batch_scheduler_balance_reqs_algo_test.cpp │ ├── batch_scheduler_interface.h │ ├── batch_scheduler_test.cpp │ ├── batch_scheduler_test.h │ ├── batch_scheduler_test_client.h │ ├── batch_scheduler_test_helper.h │ ├── batch_scheduler_test_helper_test.cpp │ ├── state │ │ ├── batch_state.h │ │ ├── scheduler_shared_counter.h │ │ └── scheduler_tick_tok.h │ ├── strategy │ │ ├── base_strategy.cpp │ │ ├── base_strategy.h │ │ ├── continuous_batching.cpp │ │ ├── continuous_batching.h │ │ ├── continuous_batching_process_transfer_queue.cpp │ │ ├── continuous_batching_test.cpp │ │ ├── strategy_factory.cpp │ │ └── strategy_factory.h │ └── workload_balance │ │ ├── pp_multibatch_balancer.cpp │ │ ├── pp_multibatch_balancer.h │ │ └── pp_multibatch_balancer_test.cpp │ ├── cache_manager │ ├── CMakeLists.txt │ ├── base_cache_manager.h │ ├── block_allocator │ │ ├── block_allocator.cpp │ │ ├── block_allocator.h │ │ ├── block_allocator_interface.h │ │ ├── block_allocator_manager.cpp │ │ ├── block_allocator_manager.h │ │ └── block_allocator_test.cpp │ ├── cache_manager_factory.cpp │ ├── cache_manager_factory.h │ ├── cache_manager_interface.h │ ├── direct_cache_manager.cpp │ ├── direct_cache_manager.h │ ├── prefix_cache_manager.cpp │ ├── prefix_cache_manager.h │ ├── prefix_cache_manager_test.cpp │ └── prefix_cache_manager_test_helper.h │ ├── connector │ ├── CMakeLists.txt │ ├── communicator │ │ ├── CMakeLists.txt │ │ ├── communicator.h │ │ ├── communicator_manager.cpp │ │ ├── communicator_manager.h │ │ ├── communicator_manager_test.cpp │ │ ├── nvida │ │ │ ├── nccl_communicator.cpp │ │ │ ├── nccl_communicator.h │ │ │ └── nccl_communicator_test.cpp │ │ └── zmq │ │ │ ├── zmq_communicator.cpp │ │ │ ├── zmq_communicator.h │ │ │ └── zmq_communicator_test.cpp │ ├── config.h │ ├── connector.cpp │ ├── connector.h │ ├── connector_test.cpp │ ├── coordinator │ │ ├── CMakeLists.txt │ │ ├── coordinator.h │ │ ├── default_coordinator.cpp │ │ ├── default_coordinator.h │ │ └── default_coordinator_test.cpp │ ├── device_collector.h │ ├── device_info_manager.cpp │ ├── device_info_manager.h │ ├── node_info.h │ ├── router_client │ │ ├── CMakeLists.txt │ │ ├── http_router_client.cpp │ │ ├── http_router_client.h │ │ ├── http_router_client_test.cpp │ │ ├── resolved_endpoint.h │ │ ├── resolved_endpoint_internal.cpp │ │ ├── resolved_endpoint_simple.cpp │ │ └── router_client.h │ ├── task_dispatcher.cpp │ ├── task_dispatcher.h │ ├── task_dispatcher_nccl_test.cpp │ ├── task_key.h │ ├── task_manager.cpp │ ├── task_manager.h │ └── task_manager_test.cpp │ ├── cpp │ ├── CMakeLists.txt │ ├── README.md │ └── serving_forward_client.cpp │ ├── data_hub │ ├── CMakeLists.txt │ ├── data_hub.cpp │ ├── data_hub.h │ ├── data_hub_test.cpp │ ├── expert_data_hub.cpp │ ├── expert_data_hub.h │ ├── expert_parallel_deepep_wrapper.cpp │ ├── expert_parallel_deepep_wrapper.h │ ├── expert_parallel_deepep_wrapper_test.cpp │ ├── hidden_unit_buffer.cpp │ ├── hidden_unit_buffer.h │ ├── hidden_unit_buffer_test.cpp │ ├── schedule_output.cpp │ ├── schedule_output.h │ └── schedule_output_test.cpp │ ├── distributed │ ├── CMakeLists.txt │ ├── control_channel.cpp │ ├── control_channel.h │ ├── control_channel_test.cpp │ ├── control_message.h │ ├── data_channel.cpp │ ├── data_channel.h │ ├── data_channel_factory.cpp │ ├── data_channel_factory.h │ ├── data_channel_interface.h │ ├── data_channel_test.cpp │ ├── distributed_coordinator.cpp │ ├── distributed_coordinator.h │ ├── distributed_coordinator_test.cpp │ ├── expert_parallel_control_channel.cpp │ ├── expert_parallel_control_channel.h │ ├── expert_parallel_control_channel_test.cpp │ ├── node_info.h │ ├── nvidia │ │ ├── nccl_data_channel.cpp │ │ ├── nccl_data_channel.h │ │ └── nccl_data_channel_test.cpp │ ├── packet_type.h │ ├── packet_util.cpp │ ├── packet_util.h │ ├── raw_packet.h │ ├── raw_socket.cpp │ ├── raw_socket.h │ └── raw_socket_test.cpp │ ├── endpoints │ ├── CMakeLists.txt │ ├── endpoint_factory.cpp │ ├── endpoint_factory.h │ ├── http │ │ ├── http_endpoint.cpp │ │ └── http_endpoint.h │ ├── local │ │ ├── local_endpoint.cpp │ │ └── local_endpoint.h │ ├── rpc │ │ └── rpc_endpoint.h │ ├── streaming │ │ ├── streaming_iterator.cpp │ │ └── streaming_iterator.h │ └── wrapper │ │ ├── triton │ │ ├── client_forward_demo.py │ │ ├── client_generate_demo.py │ │ └── config │ │ │ └── ksana_llm │ │ │ ├── 1 │ │ │ ├── model.py │ │ │ └── test_model.py │ │ │ └── config.pbtxt │ │ └── trpc │ │ ├── CMakeLists.txt │ │ ├── cmake │ │ └── external │ │ │ ├── trpc-cpp.cmake │ │ │ ├── trpc-cpp.patch │ │ │ └── trpc-robus.cmake │ │ ├── rpc_config │ │ └── trpc_ksana.yaml │ │ ├── trpc_endpoint.cpp │ │ ├── trpc_endpoint.h │ │ └── trpc_endpoint_test.cpp │ ├── helpers │ └── environment_test_helper.h │ ├── kernels │ ├── CMakeLists.txt │ ├── argmax.h │ ├── ascend │ │ ├── kernel_wrapper.cpp │ │ ├── kernel_wrapper.h │ │ ├── trans_layout.cpp │ │ └── trans_layout_test.cpp │ ├── cast.h │ ├── grouped_topk.h │ ├── nvidia │ │ ├── CMakeLists.txt │ │ ├── attention_kernel_wrapper.cpp │ │ ├── attention_kernel_wrapper.h │ │ ├── basic_kernel_wrapper.cpp │ │ ├── basic_kernel_wrapper.h │ │ ├── deepseek_deepgemm_wrapper.cpp │ │ ├── deepseek_deepgemm_wrapper.h │ │ ├── deepseek_deepgemm_wrapper_test.cpp │ │ ├── flash_attn_cpp_wrapper.cpp │ │ ├── flash_attn_cpp_wrapper.h │ │ ├── flash_attn_cpp_wrapper_test.cpp │ │ ├── fused_moe_gptq_awq_test.cpp │ │ ├── fused_moe_gptq_int4_fp8_test.cpp │ │ ├── fused_moe_test.cpp │ │ ├── grouped_topk_test.cpp │ │ ├── kernel_wrapper.h │ │ ├── kernel_wrapper_test.cpp │ │ ├── moe_kernel_wrapper.cpp │ │ ├── moe_kernel_wrapper.h │ │ ├── trans_layout.cpp │ │ ├── triton_wrapper.cpp │ │ └── triton_wrapper.h │ ├── permute.h │ ├── trans_layout.h │ └── zixiao │ │ ├── cast.cpp │ │ ├── kernel_wrapper.cpp │ │ ├── kernel_wrapper.h │ │ ├── permute.cpp │ │ └── trans_layout.cpp │ ├── layers │ ├── CMakeLists.txt │ ├── activation_layer.h │ ├── add_layer.h │ ├── add_mul_layer.h │ ├── add_norm_layer.h │ ├── all_reduce_residual_add_norm_layer.h │ ├── ascend │ │ ├── activation_layer.cpp │ │ ├── add_layer.cpp │ │ ├── add_mul_layer.cpp │ │ ├── add_norm_layer.cpp │ │ ├── all_reduce_residual_add_norm_layer.cpp │ │ ├── assemble_tokens_hidden_layer.cpp │ │ ├── attention_layer.cpp │ │ ├── batched_matmul_layer.cpp │ │ ├── cast_layer.cpp │ │ ├── concat_layer.cpp │ │ ├── emb_lookup_layer.cpp │ │ ├── flash_attention_layer.cpp │ │ ├── flash_mla_attention_layer.cpp │ │ ├── flash_sparse_mla_indexer_layer.cpp │ │ ├── greedy_sampler_layer.cpp │ │ ├── hccl_all_gather_layer.cpp │ │ ├── hccl_all_reduce_sum_layer.cpp │ │ ├── input_refit_layer.cpp │ │ ├── layernorm_layer.cpp │ │ ├── matmul_layer.cpp │ │ ├── mem_adjuster_layer.cpp │ │ ├── paged_attention_layer.cpp │ │ ├── paged_mla_attention_layer.cpp │ │ ├── paged_sparse_mla_indexer_layer.cpp │ │ ├── permute_layer.cpp │ │ ├── silu_mul_layer.cpp │ │ └── split_layer.cpp │ ├── assemble_tokens_hidden_layer.h │ ├── attention_layer.h │ ├── base_layer.h │ ├── batched_matmul_layer.h │ ├── blockwise_matmul_layer.h │ ├── cast_layer.h │ ├── concat_layer.h │ ├── cpu │ │ └── emb_lookup_layer.cpp │ ├── custom_all_reduce_sum_layer.h │ ├── cutlass_matmul_layer.h │ ├── cutlass_moe_layer.h │ ├── eccl_all_gather_layer.h │ ├── eccl_all_reduce_sum_layer.h │ ├── emb_lookup_layer.h │ ├── flash_attention_layer.h │ ├── flash_mla_attention_layer.h │ ├── flash_sparse_mla_indexer_layer.h │ ├── fp8_matmul_layer.h │ ├── fp8_moe_layer.h │ ├── greedy_sampler_layer.h │ ├── grouped_topk_layer.h │ ├── hccl_all_gather_layer.h │ ├── hccl_all_reduce_sum_layer.h │ ├── input_refit_layer.h │ ├── layer_test.cpp │ ├── layer_workspace_manager.h │ ├── layernorm_layer.h │ ├── machete_matmul_layer.h │ ├── marlin_matmul_layer.h │ ├── marlin_moe_layer.h │ ├── matmul_layer.h │ ├── matmul_layer_factory.cpp │ ├── matmul_layer_factory.h │ ├── mem_adjuster_layer.h │ ├── moe_layer.h │ ├── moe_layer_factory.cpp │ ├── moe_layer_factory.h │ ├── mul_layer.h │ ├── nccl_all_gather_layer.h │ ├── nccl_all_reduce_sum_layer.h │ ├── nvidia │ │ ├── activation_layer.cpp │ │ ├── add_layer.cpp │ │ ├── add_mul_layer.cpp │ │ ├── add_norm_layer.cpp │ │ ├── all_reduce_residual_add_norm_layer.cpp │ │ ├── assemble_tokens_hidden_layer.cpp │ │ ├── attention_layer.cpp │ │ ├── batched_matmul_layer.cpp │ │ ├── blockwise_matmul_layer.cpp │ │ ├── cast_layer.cpp │ │ ├── concat_layer.cpp │ │ ├── custom_all_reduce_sum_layer.cpp │ │ ├── cutlass_matmul_layer.cpp │ │ ├── cutlass_moe_layer.cpp │ │ ├── emb_lookup_layer.cpp │ │ ├── flash_attention_layer.cpp │ │ ├── flash_mla_attention_layer.cpp │ │ ├── flash_sparse_mla_indexer_layer.cpp │ │ ├── fp8_matmul_layer.cpp │ │ ├── fp8_moe_layer.cpp │ │ ├── greedy_sampler_layer.cpp │ │ ├── grouped_topk_layer.cpp │ │ ├── input_refit_layer.cpp │ │ ├── layernorm_layer.cpp │ │ ├── machete_matmul_layer.cpp │ │ ├── marlin_matmul_layer.cpp │ │ ├── marlin_moe_layer.cpp │ │ ├── matmul_layer.cpp │ │ ├── mem_adjuster_layer.cpp │ │ ├── moe_layer.cpp │ │ ├── mul_layer.cpp │ │ ├── nccl_all_gather_layer.cpp │ │ ├── nccl_all_reduce_sum_layer.cpp │ │ ├── paged_attention_layer.cpp │ │ ├── paged_mla_attention_layer.cpp │ │ ├── paged_sparse_mla_indexer_layer.cpp │ │ ├── permute_layer.cpp │ │ ├── set_torch_stream_layer.cpp │ │ ├── silu_mul_layer.cpp │ │ └── split_layer.cpp │ ├── paged_attention_layer.h │ ├── paged_mla_attention_layer.h │ ├── paged_sparse_mla_indexer_layer.h │ ├── permute_layer.h │ ├── set_torch_stream_layer.h │ ├── silu_mul_layer.h │ ├── split_layer.h │ └── zixiao │ │ ├── activation_layer.cpp │ │ ├── add_layer.cpp │ │ ├── add_norm_layer.cpp │ │ ├── all_reduce_residual_add_norm_layer.cpp │ │ ├── assemble_tokens_hiddens_layer.cpp │ │ ├── attention_layer.cpp │ │ ├── cast_layer.cpp │ │ ├── eccl_all_gather_layer.cpp │ │ ├── eccl_all_reduce_sum_layer.cpp │ │ ├── emb_lookup_layer.cpp │ │ ├── flash_attention_layer.cpp │ │ ├── input_refit_layer.cpp │ │ ├── layernorm_layer.cpp │ │ ├── matmul_layer.cpp │ │ ├── paged_attention_layer.cpp │ │ └── silu_mul_layer.cpp │ ├── model_loader │ ├── CMakeLists.txt │ ├── check_deepseek_weight_tensor.py │ ├── check_gguf_tensor.py │ ├── check_llama_weight_tensor.py │ ├── check_pytorch_bin_tensor.py │ ├── check_pytorch_safe_tensor.py │ ├── config_parser │ │ ├── gguf_config_parser.cpp │ │ ├── gguf_config_parser.h │ │ ├── model_config_parser.cpp │ │ ├── model_config_parser.h │ │ ├── model_config_parser_factory.cpp │ │ ├── model_config_parser_factory.h │ │ ├── pytorch_config_parser.cpp │ │ └── pytorch_config_parser.h │ ├── file_loader │ │ ├── base_file_loader.h │ │ ├── gguf_file_loader.cpp │ │ ├── gguf_file_loader.h │ │ ├── model_file_loader.h │ │ ├── model_file_loder.cpp │ │ ├── pytorch_bin_file_loader.cpp │ │ ├── pytorch_bin_file_loader.h │ │ ├── pytorch_safetensor_file_loader.cpp │ │ └── pytorch_safetensor_file_loader.h │ ├── llama_model_loader_test.cpp │ ├── model_loader_test.cpp │ ├── model_loader_utils.cpp │ ├── model_loader_utils.h │ ├── new_deepseek_v3_model_loader_test.cpp │ └── weight_loader │ │ ├── model_weight_loader.cpp │ │ ├── model_weight_loader.h │ │ ├── model_weight_loader_factory.cpp │ │ └── model_weight_loader_factory.h │ ├── model_performance │ ├── CMakeLists.txt │ ├── communication_performance_runner.cpp │ ├── communication_performance_runner.h │ ├── communication_performance_runner_test.cpp │ ├── model_performance_runner.cpp │ ├── model_performance_runner.h │ ├── model_performance_runner_test.cpp │ ├── perf_profile_config_builder_for_json.cpp │ ├── perf_profile_config_builder_for_json.h │ ├── run_communication_performance.cpp │ ├── run_model_performance.cpp │ ├── run_model_performance_test.cpp │ └── test_config.json │ ├── models │ ├── CMakeLists.txt │ ├── baichuan │ │ ├── baichuan_model.cpp │ │ ├── baichuan_model.h │ │ ├── baichuan_weight.cpp │ │ └── baichuan_weight.h │ ├── base │ │ ├── CMakeLists.txt │ │ ├── base_model.cpp │ │ ├── base_model.h │ │ ├── base_model_config.cpp │ │ ├── base_model_config.h │ │ ├── base_model_config_parser.cpp │ │ ├── base_model_config_parser.h │ │ ├── base_model_weight_loader.cpp │ │ ├── base_model_weight_loader.h │ │ ├── base_weight.cpp │ │ ├── base_weight.h │ │ ├── buffer_manager.cpp │ │ ├── buffer_manager.h │ │ ├── common_model_weight_loader.cpp │ │ ├── common_model_weight_loader.h │ │ ├── fake_weight_for_test.h │ │ ├── fake_weight_test.cpp │ │ ├── forwarding_context.cpp │ │ ├── forwarding_context.h │ │ ├── layer_creation_context.cpp │ │ ├── layer_creation_context.h │ │ ├── model_arch.cpp │ │ ├── model_arch.h │ │ ├── model_communicator.cpp │ │ ├── model_communicator.h │ │ ├── model_format.h │ │ ├── model_input.cpp │ │ ├── model_input.h │ │ ├── model_input_test.cpp │ │ ├── model_output.cpp │ │ ├── model_output.h │ │ ├── model_weight.cpp │ │ └── model_weight.h │ ├── bge_reranker_minicpm │ │ ├── bge_reranker_minicpm_config.h │ │ ├── bge_reranker_minicpm_model.cpp │ │ ├── bge_reranker_minicpm_model.h │ │ ├── bge_reranker_minicpm_weight.cpp │ │ └── bge_reranker_minicpm_weight.h │ ├── chatglm │ │ ├── chatglm_config.h │ │ ├── chatglm_model.cpp │ │ ├── chatglm_model.h │ │ ├── chatglm_weight.cpp │ │ └── chatglm_weight.h │ ├── common │ │ ├── common_config.h │ │ ├── common_model.cpp │ │ ├── common_model.h │ │ ├── common_weight.cpp │ │ ├── common_weight.h │ │ ├── model_interface.h │ │ ├── model_test_helper.h │ │ ├── simple_decoder_layer.cpp │ │ ├── simple_decoder_layer.h │ │ └── simple_decoder_layer_test.cpp │ ├── common_moe │ │ ├── common_moe_weight.cpp │ │ ├── common_moe_weight.h │ │ ├── common_moe_weight_test.cpp │ │ └── moe_config.h │ ├── communicator │ │ ├── tp_communicator.cpp │ │ └── tp_communicator.h │ ├── deepseek_v3 │ │ ├── deepseek_v3_config.h │ │ ├── deepseek_v3_dp_test.cpp │ │ ├── deepseek_v3_model.cpp │ │ ├── deepseek_v3_model.h │ │ └── deepseek_v3_model_test.cpp │ ├── gpt │ │ ├── gpt_config.h │ │ ├── gpt_model.cpp │ │ ├── gpt_model.h │ │ ├── gpt_weight.cpp │ │ └── gpt_weight.h │ ├── hunyuan_large │ │ ├── hunyuan_large_model.cpp │ │ ├── hunyuan_large_model.h │ │ ├── hunyuan_large_weight.cpp │ │ └── hunyuan_large_weight.h │ ├── hunyuan_turbo │ │ ├── hunyuan_turbo_model.cpp │ │ ├── hunyuan_turbo_model.h │ │ ├── hunyuan_turbo_test.cpp │ │ ├── hunyuan_turbo_weight.cpp │ │ └── hunyuan_turbo_weight.h │ ├── internlm2 │ │ ├── internlm2_weight.cpp │ │ ├── internlm2_weight.h │ │ ├── internlm_model.cpp │ │ └── internlm_model.h │ ├── internlmxcomposer2 │ │ ├── internlmxcomposer2_model.cpp │ │ └── internlmxcomposer2_model.h │ ├── llama │ │ ├── llama_model.cpp │ │ ├── llama_model.h │ │ ├── llama_model_config.cpp │ │ ├── llama_model_config.h │ │ ├── llama_model_config_parser.cpp │ │ ├── llama_model_config_parser.h │ │ ├── llama_model_weight_loader.cpp │ │ ├── llama_model_weight_loader.h │ │ ├── llama_test.cpp │ │ ├── llama_weight.cpp │ │ └── llama_weight.h │ ├── llama4 │ │ ├── llama4_model.cpp │ │ ├── llama4_model.h │ │ ├── llama4_weight.cpp │ │ └── llama4_weight.h │ ├── mixtral │ │ ├── mixtral_model.cpp │ │ ├── mixtral_model.h │ │ ├── mixtral_weight.cpp │ │ └── mixtral_weight.h │ ├── new_deepseek_v3 │ │ ├── new_deepseek_v3_config.cpp │ │ ├── new_deepseek_v3_config.h │ │ ├── new_deepseek_v3_config_parser.cpp │ │ ├── new_deepseek_v3_config_parser.h │ │ ├── new_deepseek_v3_weight_impl.cpp │ │ ├── new_deepseek_v3_weight_impl.h │ │ ├── new_deepseek_v3_weight_loader.cpp │ │ └── new_deepseek_v3_weight_loader.h │ ├── quant │ │ ├── cutlass_utils.cpp │ │ ├── cutlass_utils.h │ │ ├── cutlass_utils_test.cpp │ │ ├── machete_utils.cpp │ │ ├── machete_utils.h │ │ ├── marlin_utils.cpp │ │ ├── marlin_utils.h │ │ ├── marlin_utils_test.cpp │ │ ├── quant_fp8_weight_test.cpp │ │ ├── quant_int4_weight_test.cpp │ │ ├── quant_weight.cpp │ │ └── quant_weight.h │ ├── qwen │ │ ├── new_qwen_config.cpp │ │ ├── new_qwen_config.h │ │ ├── new_qwen_config_parser.cpp │ │ ├── new_qwen_config_parser.h │ │ ├── new_qwen_weight_loader.cpp │ │ ├── new_qwen_weight_loader.h │ │ ├── qwen_model.cpp │ │ ├── qwen_model.h │ │ ├── qwen_test.cpp │ │ ├── qwen_weight.cpp │ │ └── qwen_weight.h │ ├── qwen2_moe │ │ ├── qwen2_moe_model.cpp │ │ ├── qwen2_moe_model.h │ │ ├── qwen2_moe_weight.cpp │ │ └── qwen2_moe_weight.h │ └── qwen3_moe │ │ ├── qwen3_moe_model.cpp │ │ ├── qwen3_moe_model.h │ │ ├── qwen3_moe_weight.cpp │ │ └── qwen3_moe_weight.h │ ├── modules │ ├── CMakeLists.txt │ ├── attention │ │ ├── common_attention.cpp │ │ ├── common_attention.h │ │ ├── cross_layer_attention.cpp │ │ ├── cross_layer_attention.h │ │ ├── multihead_attention.cpp │ │ ├── multihead_attention.h │ │ ├── multihead_latent_attention.cpp │ │ ├── multihead_latent_attention.h │ │ ├── multihead_latent_attention_test.cpp │ │ ├── sparse_mla_indexer.cpp │ │ └── sparse_mla_indexer.h │ ├── basic │ │ ├── activation.cpp │ │ ├── activation.h │ │ ├── add.cpp │ │ ├── add.h │ │ ├── add_mul.cpp │ │ ├── add_mul.h │ │ ├── add_norm.cpp │ │ ├── add_norm.h │ │ ├── all_reduce_fused_norm_add.cpp │ │ ├── all_reduce_fused_norm_add.h │ │ ├── bmm.cpp │ │ ├── bmm.h │ │ ├── bmm_test.cpp │ │ ├── flash_attention.cpp │ │ ├── flash_attention.h │ │ ├── flash_mla_attention.cpp │ │ ├── flash_mla_attention.h │ │ ├── flash_sparse_mla_indexer.cpp │ │ ├── flash_sparse_mla_indexer.h │ │ ├── fuse_post_attention_add_norm.cpp │ │ ├── fuse_post_attention_add_norm.h │ │ ├── fuse_pre_attention_add_norm.cpp │ │ ├── fuse_pre_attention_add_norm.h │ │ ├── layernorm.cpp │ │ ├── layernorm.h │ │ ├── linear.cpp │ │ ├── linear.h │ │ ├── mem_adjuster.cpp │ │ ├── mem_adjuster.h │ │ ├── moe.cpp │ │ ├── moe.h │ │ ├── mul.cpp │ │ ├── mul.h │ │ ├── paged_attention.cpp │ │ ├── paged_attention.h │ │ ├── paged_mla_attention.cpp │ │ ├── paged_mla_attention.h │ │ ├── paged_sparse_mla_indexer.cpp │ │ ├── paged_sparse_mla_indexer.h │ │ ├── sigmoid.cpp │ │ ├── sigmoid.h │ │ ├── silu_mul.cpp │ │ ├── silu_mul.h │ │ ├── split.cpp │ │ └── split.h │ └── ffn │ │ ├── two_layered_ffn.cpp │ │ └── two_layered_ffn.h │ ├── multi_batch_controller │ ├── CMakeLists.txt │ ├── multi_batch_controller.cpp │ ├── multi_batch_controller.h │ └── multi_batch_controller_test.cpp │ ├── periphery │ ├── CMakeLists.txt │ ├── version_info.h.in │ ├── version_reporter.cpp │ ├── version_reporter.h │ └── version_reporter_test.cpp │ ├── plugins │ ├── CMakeLists.txt │ ├── base_plugin.cpp │ └── base_plugin.h │ ├── profiler │ ├── CMakeLists.txt │ ├── profile_event.cpp │ ├── profile_event.h │ ├── profiler.cpp │ ├── profiler.h │ ├── profiler_test.cpp │ ├── reporter.cpp │ ├── reporter.h │ ├── sched_event_tracer.cpp │ ├── sched_event_tracer.h │ ├── timer.cpp │ ├── timer.h │ ├── writer.cpp │ └── writer.h │ ├── python │ ├── ksana_llm │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── hf_transformers_model_config.py │ │ ├── ksana_engine.py │ │ ├── ksana_plugin.py │ │ └── processor_op_base.py │ ├── ksana_plugin │ │ ├── internlmxcomposer2 │ │ │ ├── ksana_plugin.py │ │ │ ├── ksana_plugin_model.py │ │ │ └── requirements.txt │ │ ├── internvl_chat │ │ │ ├── InternVL │ │ │ │ └── ksana_plugin_model.py │ │ │ ├── InternVL2_5 │ │ │ │ └── ksana_plugin_model.py │ │ │ ├── ksana_plugin.py │ │ │ └── requirements.txt │ │ ├── plugin_model.py │ │ ├── plugin_utils.py │ │ ├── qwen2_vl │ │ │ ├── ksana_plugin.py │ │ │ ├── ksana_plugin_model.py │ │ │ └── requirements.txt │ │ ├── qwen_vl │ │ │ ├── ksana_plugin.py │ │ │ ├── ksana_plugin_model.py │ │ │ └── requirements.txt │ │ └── trt_engine.py │ ├── kv_scale_files │ │ ├── README.md │ │ ├── README_cn.md │ │ ├── llama2-70b-fp8-kv │ │ │ └── kv_cache_scales.json │ │ ├── llama2-7b-fp8-kv │ │ │ └── kv_cache_scales.json │ │ ├── qwen2-0.5b-fp8-kv │ │ │ └── kv_cache_scales.json │ │ └── qwen2-1.5b-fp8-kv │ │ │ └── kv_cache_scales.json │ ├── openaiapi │ │ ├── __init__.py │ │ ├── chat_templates │ │ │ ├── tool_chat_template_deepseekr1.jinja │ │ │ └── tool_chat_template_deepseekv3.jinja │ │ ├── openai_adapter.py │ │ ├── openai_protocol.py │ │ ├── readme_cn.md │ │ ├── reasoning │ │ │ ├── __init__.py │ │ │ ├── abs_reasoning_parsers.py │ │ │ ├── deepseek_r1_reasoning_parser.py │ │ │ └── qwen3_reasoning_parser.py │ │ ├── request_converter.py │ │ ├── serving_chat.py │ │ ├── serving_completion.py │ │ ├── serving_embedding.py │ │ ├── serving_models.py │ │ ├── tool_parsers │ │ │ ├── __init__.py │ │ │ ├── abstract_tool_parser.py │ │ │ ├── deepseekv3_tool_parser.py │ │ │ ├── hermes_tool_parser.py │ │ │ ├── internlm2_tool_parser.py │ │ │ ├── kimi_k2_tool_parser.py │ │ │ ├── llama4_pythonic_tool_parser.py │ │ │ ├── llama_tool_parser.py │ │ │ ├── mistral_tool_parser.py │ │ │ ├── pythonic_tool_parser.py │ │ │ └── utils.py │ │ └── transformers_utils │ │ │ ├── chat_utils.py │ │ │ ├── processor.py │ │ │ ├── tokenizer_base.py │ │ │ └── tokenizers │ │ │ ├── __init__.py │ │ │ └── mistral.py │ ├── serving_forward_client.py │ ├── serving_generate_client.py │ ├── serving_server.py │ ├── serving_server_test.py │ ├── simple_router │ │ ├── README.md │ │ ├── __init__.py │ │ ├── config.ini │ │ ├── config.py │ │ ├── database.py │ │ ├── generate.py │ │ ├── main.py │ │ ├── models.py │ │ ├── name_service │ │ │ ├── __init__.py │ │ │ ├── auto_provider.py │ │ │ ├── name_service.py │ │ │ └── polaris_provider.py │ │ ├── node.py │ │ ├── schemas.py │ │ ├── services.py │ │ └── tests │ │ │ └── __init__.py │ ├── tests │ │ ├── conftest.py │ │ ├── openai_tools │ │ │ ├── __init__.py │ │ │ └── test_kimi_k2_tool_parser.py │ │ ├── test_constraint.py │ │ ├── test_flexible_cache.py │ │ ├── test_ksana_engine.py │ │ ├── test_openai_api_client.py │ │ ├── test_processor_op_base.py │ │ ├── test_qwen_model.py │ │ ├── test_structured_output.py │ │ └── utils.py │ ├── utilize │ │ ├── __init__.py │ │ ├── logger.py │ │ └── utils.py │ └── weight_map │ │ ├── README.md │ │ ├── chatglm_weight_map.json │ │ ├── deepseek_v2_weight_map.json │ │ ├── deepseek_v3_weight_map.json │ │ ├── fairseq-transformer_weight_map.json │ │ ├── gpt2_weight_map.json │ │ ├── hunyuan_weight_map.json │ │ ├── internlm2_weight_map.json │ │ ├── internlmxcomposer2_weight_map.json │ │ ├── internvl_chat_weight_map.json │ │ ├── internvl_qwen2_weight_map.json │ │ ├── llama4_weight_map.json │ │ ├── llama_gguf_weight_map.json │ │ ├── mixtral_weight_map.json │ │ ├── openai-gpt_weight_map.json │ │ ├── qwen3_moe_weight_map.json │ │ ├── qwen3_weight_map.json │ │ └── qwen_weight_map.json │ ├── runtime │ ├── CMakeLists.txt │ ├── cuda_graph_runner.cpp │ ├── cuda_graph_runner.h │ ├── draft_generator │ │ ├── draft_generator_interface.h │ │ ├── draft_tokens.h │ │ ├── trie_generator.cpp │ │ ├── trie_generator.h │ │ └── trie_generator_test.cpp │ ├── forward_request.h │ ├── generation_controller.cpp │ ├── generation_controller.h │ ├── generation_controller_test.cpp │ ├── infer_request.cpp │ ├── infer_request.h │ ├── infer_stage.h │ ├── layer_progress_tracker.cpp │ ├── layer_progress_tracker.h │ ├── layer_progress_tracker_test.cpp │ ├── llm_runtime.cpp │ ├── llm_runtime.h │ ├── llm_runtime_test.cpp │ ├── model_instance.cpp │ ├── model_instance.h │ ├── request_state.h │ ├── sampling_request.h │ ├── structured_generation │ │ ├── reasoning_structured_generator.cpp │ │ ├── reasoning_structured_generator.h │ │ ├── structured_generator_factory.cpp │ │ ├── structured_generator_factory.h │ │ ├── structured_generator_interface.h │ │ └── xgrammar │ │ │ ├── xgrammar_structured_generator.cpp │ │ │ ├── xgrammar_structured_generator.h │ │ │ ├── xgrammar_structured_generator_creator.h │ │ │ └── xgrammar_structured_generator_test.cpp │ ├── threadpool.h │ ├── threadpool_test.cpp │ ├── weight_instance.cpp │ ├── weight_instance.h │ ├── weight_instance_inferface.h │ ├── weight_instance_test.cpp │ ├── worker.cpp │ └── worker.h │ ├── samplers │ ├── CMakeLists.txt │ ├── base │ │ ├── base_sampling.cpp │ │ └── base_sampling.h │ ├── sampler.cpp │ ├── sampler.h │ ├── sampler_test.cpp │ ├── topk │ │ ├── topk_sampling.cpp │ │ └── topk_sampling.h │ └── topp │ │ ├── topp_sampling.cpp │ │ └── topp_sampling.h │ ├── service │ ├── CMakeLists.txt │ ├── inference_engine.cpp │ ├── inference_engine.h │ ├── inference_engine_test.cpp │ ├── inference_server.cpp │ ├── inference_server.h │ ├── service_lifetime.cpp │ ├── service_lifetime.h │ └── service_lifetime_interface.h │ ├── torch_op │ ├── CMakeLists.txt │ ├── serving_op.cpp │ ├── serving_op.h │ └── serving_op_test.cpp │ ├── transfer │ ├── CMakeLists.txt │ ├── transfer_engine.cpp │ ├── transfer_engine.h │ ├── transfer_test.cpp │ └── transfer_types.h │ └── utils │ ├── CMakeLists.txt │ ├── ascend │ ├── acl_utils.cpp │ ├── acl_utils.h │ ├── ascend_context.cpp │ ├── ascend_context.h │ ├── ascend_device.cpp │ ├── ascend_device.h │ └── hccl_utils.h │ ├── attention_backend │ ├── attention_backend_manager.cpp │ ├── attention_backend_manager.h │ ├── attention_backend_manager_test.cpp │ ├── flash_attention_backend.cpp │ ├── flash_attention_backend.h │ └── flash_attention_backend_test.cpp │ ├── barrier.h │ ├── base_file_tensor_loader.h │ ├── blocking_queue.h │ ├── calc_intvec_hash.h │ ├── channel.h │ ├── channel_test.cpp │ ├── common_context.cpp │ ├── common_context.h │ ├── common_device.h │ ├── config │ ├── model_config_parser.cpp │ ├── model_config_parser.h │ ├── schedule_config_parser.cpp │ └── schedule_config_parser.h │ ├── context.h │ ├── context_test.cpp │ ├── device_types.cpp │ ├── device_types.h │ ├── device_utils.h │ ├── dynamic_memory_counter.cpp │ ├── dynamic_memory_counter.h │ ├── dynamic_memory_pool.cpp │ ├── dynamic_memory_pool.h │ ├── dynamic_memory_pool_test.cpp │ ├── environment.cpp │ ├── environment.h │ ├── environment_test.cpp │ ├── expert_parallel_utils.h │ ├── finite_state_machine.cpp │ ├── finite_state_machine.h │ ├── finite_state_machine_test.cpp │ ├── forward_test.cpp │ ├── get_custom_weight_name.cpp │ ├── get_custom_weight_name.h │ ├── get_custom_weight_name_test.cpp │ ├── gguf_file_tensor_loader.cpp │ ├── gguf_file_tensor_loader.h │ ├── gguf_file_tensor_loader_test.cpp │ ├── gguf_file_utils.cpp │ ├── gguf_file_utils.h │ ├── grammar_backend.cpp │ ├── grammar_backend.h │ ├── grammar_matcher.cpp │ ├── grammar_matcher.h │ ├── grammar_test.cpp │ ├── id_generator.h │ ├── json_config_utils.cpp │ ├── json_config_utils.h │ ├── logger.cpp │ ├── logger.h │ ├── logger_test.cpp │ ├── memory_allocator.cpp │ ├── memory_allocator.h │ ├── memory_allocator_interface.h │ ├── memory_utils.cpp │ ├── memory_utils.h │ ├── memory_utils_test.cpp │ ├── nvidia │ ├── cuda_utils.cpp │ ├── cuda_utils.h │ ├── deepseek_deepgemm_bridge.cu │ ├── deepseek_deepgemm_bridge.h │ ├── grammar_backend_nvidia.cpp │ ├── grammar_backend_nvidia.h │ ├── grammar_matcher_nvidia.cpp │ ├── grammar_matcher_nvidia.h │ ├── nccl_utils.cpp │ ├── nccl_utils.h │ ├── nvidia_context.cpp │ ├── nvidia_context.h │ ├── nvidia_device.cpp │ └── nvidia_device.h │ ├── optional_file.h │ ├── pinned_mem_buffer_pool.h │ ├── pinned_mem_buffer_pool_test.cpp │ ├── pytorch_file_tensor_loader.cpp │ ├── pytorch_file_tensor_loader.h │ ├── pytorch_file_tensor_loader_test.cpp │ ├── quantization.h │ ├── reasoning_config.cpp │ ├── reasoning_config.h │ ├── reasoning_config_test.cpp │ ├── request.cpp │ ├── request.h │ ├── request_packer.cpp │ ├── request_packer.h │ ├── request_packer_test.cpp │ ├── request_serial.h │ ├── request_test.cpp │ ├── ret_code.h │ ├── runtime_dll_manager │ ├── runtime_dll_manager.cpp │ ├── runtime_dll_manager.h │ └── runtime_dll_manager_test.cpp │ ├── safetensors_file_saver.cpp │ ├── safetensors_file_saver.h │ ├── safetensors_file_tensor_loader.cpp │ ├── safetensors_file_tensor_loader.h │ ├── safetensors_file_tensor_loader_test.cpp │ ├── safetensors_file_tensor_loader_test_helper.h │ ├── schedule_output_process.cpp │ ├── schedule_output_process.h │ ├── search_path.cpp │ ├── search_path.h │ ├── search_status.h │ ├── service_utils.cpp │ ├── service_utils.h │ ├── singleton.h │ ├── singleton_test.cpp │ ├── socket_util.cpp │ ├── socket_util.h │ ├── socket_util_test.cpp │ ├── status.cpp │ ├── status.h │ ├── stop_checker.cpp │ ├── stop_checker.h │ ├── stop_checker_test.cpp │ ├── string_utils.h │ ├── tensor.cpp │ ├── tensor.h │ ├── tensor_manager.h │ ├── tensor_test.cpp │ ├── tensor_test_helper.h │ ├── tokenizer.cpp │ ├── tokenizer.h │ ├── tokenizer_test.cpp │ ├── utils.h │ ├── utils_test.cpp │ ├── waiter.cpp │ ├── waiter.h │ ├── waiter_test.cpp │ ├── yaml_reader.cpp │ ├── yaml_reader.h │ └── zixiao │ ├── tops_utils.cpp │ ├── tops_utils.h │ ├── zixiao_context.cpp │ ├── zixiao_context.h │ ├── zixiao_device.cpp │ └── zixiao_device.h ├── tests ├── integration_test.py ├── test.cpp ├── test.h ├── tiny_model_configs │ ├── bge_reranker_model_config.json │ ├── gpt_model_config.json │ ├── internlmxcomposer_model_config.json │ ├── moe_model_config.json │ ├── simple_model_config.json │ └── vl_model_config.json └── triton_wrapper_test.py └── tools ├── CMakeLists.txt ├── deepep_wrapper ├── CMakeLists.txt ├── README.md └── src │ ├── CMakeLists.txt │ ├── common.cpp │ ├── common.h │ ├── deep_ep.cpp │ ├── deep_ep.h │ ├── main.cpp │ ├── process.cpp │ ├── wrapper.cpp │ └── wrapper.h ├── eplb ├── README.md ├── expert_activation_heatmap.py └── expert_parallel_get_map.py ├── generate_triton_kernel_cubin.sh ├── get_nvidia_gpu_properties.py └── profiler ├── README.md ├── README_cn.md └── sched_event_trace_process.py /.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/.clang-format -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/.gitmodules -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/.pylintrc -------------------------------------------------------------------------------- /3rdparty/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/.clang-format -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/.gitignore -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/3rdparty/deepgemm/deepgemm.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/3rdparty/deepgemm/deepgemm.patch -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/3rdparty/ini_reader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/3rdparty/ini_reader.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/cmake/ascend.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/cmake/ascend.cmake -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/cmake/base.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/cmake/base.cmake -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/cmake/flashinfer.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/cmake/flashinfer.cmake -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/cmake/fmt.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/cmake/fmt.cmake -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/cmake/module/CMakeTestCCECompiler.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_CCE_COMPILER_WORKS 1 CACHE INTERNAL "") 2 | -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/cmake/nvidia.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/cmake/nvidia.cmake -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/cmake/test.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/cmake/test.cmake -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/cmake/yaml-cpp.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/cmake/yaml-cpp.cmake -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/kernels/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/kernels/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/kernels/nvidia/add/add.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/kernels/nvidia/add/add.cu -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/kernels/nvidia/add/add.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/kernels/nvidia/add/add.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/kernels/nvidia/cast/cast.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/kernels/nvidia/cast/cast.cu -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/kernels/nvidia/cast/cast.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/kernels/nvidia/cast/cast.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/ascend/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/ascend/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/ascend/atb_executor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/ascend/atb_executor.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/ascend/common.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/ascend/common.cc -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/ascend/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/ascend/common.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/common.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/common.cc -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/common.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/nvidia/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/nvidia/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/nvidia/assert.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/nvidia/assert.cpp -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/nvidia/assert.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/nvidia/assert.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/nvidia/cuda_utils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/nvidia/cuda_utils.cu -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/nvidia/cuda_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/nvidia/cuda_utils.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/nvidia/quantization.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/nvidia/quantization.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/nvidia/string_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/nvidia/string_utils.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/nvidia/utils_test.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/nvidia/utils_test.cu -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/nvidia/workspace.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/nvidia/workspace.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/quant_type.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/quant_type.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/csrc/utils/zixiao/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/csrc/utils/zixiao/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/tests/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/tests/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/tests/kernels/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/tests/kernels/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/tests/references/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/tests/references/CMakeLists.txt -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/tests/references/argmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/tests/references/argmax.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/tests/references/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/tests/references/matmul.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/tests/references/permute.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/tests/references/permute.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/tests/references/rms_layernorm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/tests/references/rms_layernorm.h -------------------------------------------------------------------------------- /3rdparty/LLM_kernels/tools/touch_host_stub.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/3rdparty/LLM_kernels/tools/touch_host_stub.sh -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/README.md -------------------------------------------------------------------------------- /README_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/README_cn.md -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/benchmarks/README.md -------------------------------------------------------------------------------- /benchmarks/bench_one_batch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/benchmarks/bench_one_batch.sh -------------------------------------------------------------------------------- /benchmarks/benchmark_input.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/benchmarks/benchmark_input.csv -------------------------------------------------------------------------------- /benchmarks/benchmark_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/benchmarks/benchmark_throughput.py -------------------------------------------------------------------------------- /benchmarks/check_diff.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/benchmarks/check_diff.py -------------------------------------------------------------------------------- /benchmarks/longbench_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/benchmarks/longbench_reader.py -------------------------------------------------------------------------------- /benchmarks/prefix_cache_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/benchmarks/prefix_cache_config.yaml -------------------------------------------------------------------------------- /benchmarks/prefix_cache_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/benchmarks/prefix_cache_reader.py -------------------------------------------------------------------------------- /benchmarks/share_gpt_500.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/benchmarks/share_gpt_500.csv -------------------------------------------------------------------------------- /cmake/FindCUDNN.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/FindCUDNN.cmake -------------------------------------------------------------------------------- /cmake/FindNCCL.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/FindNCCL.cmake -------------------------------------------------------------------------------- /cmake/FlashAttention3AutoBuild.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/FlashAttention3AutoBuild.cmake -------------------------------------------------------------------------------- /cmake/LLM_kernels.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/LLM_kernels.cmake -------------------------------------------------------------------------------- /cmake/ascend.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/ascend.cmake -------------------------------------------------------------------------------- /cmake/base.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/base.cmake -------------------------------------------------------------------------------- /cmake/external/abseil-cpp.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/abseil-cpp.cmake -------------------------------------------------------------------------------- /cmake/external/base64.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/base64.cmake -------------------------------------------------------------------------------- /cmake/external/boost.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/boost.cmake -------------------------------------------------------------------------------- /cmake/external/cppzmq.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/cppzmq.cmake -------------------------------------------------------------------------------- /cmake/external/fmt.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/fmt.cmake -------------------------------------------------------------------------------- /cmake/external/gflags.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/gflags.cmake -------------------------------------------------------------------------------- /cmake/external/gtest.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/gtest.cmake -------------------------------------------------------------------------------- /cmake/external/httplib.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/httplib.cmake -------------------------------------------------------------------------------- /cmake/external/libzmq.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/libzmq.cmake -------------------------------------------------------------------------------- /cmake/external/loguru.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/loguru.cmake -------------------------------------------------------------------------------- /cmake/external/msgpack.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/msgpack.cmake -------------------------------------------------------------------------------- /cmake/external/nlohmann_json.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/nlohmann_json.cmake -------------------------------------------------------------------------------- /cmake/external/opentelemetry.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/opentelemetry.cmake -------------------------------------------------------------------------------- /cmake/external/polaris_cpp.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/polaris_cpp.cmake -------------------------------------------------------------------------------- /cmake/external/protobuf.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/protobuf.cmake -------------------------------------------------------------------------------- /cmake/external/pybind11.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/pybind11.cmake -------------------------------------------------------------------------------- /cmake/external/re2.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/re2.cmake -------------------------------------------------------------------------------- /cmake/external/tbb.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/tbb.cmake -------------------------------------------------------------------------------- /cmake/external/xgrammar.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/xgrammar.cmake -------------------------------------------------------------------------------- /cmake/external/yaml-cpp.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/external/yaml-cpp.cmake -------------------------------------------------------------------------------- /cmake/internal.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/internal.cmake -------------------------------------------------------------------------------- /cmake/module/CMakeCCECompiler.cmake.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/module/CMakeCCECompiler.cmake.in -------------------------------------------------------------------------------- /cmake/module/CMakeCCEFunction.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/module/CMakeCCEFunction.cmake -------------------------------------------------------------------------------- /cmake/module/CMakeCCEInformation.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/module/CMakeCCEInformation.cmake -------------------------------------------------------------------------------- /cmake/module/CMakeDetermineCCECompiler.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/module/CMakeDetermineCCECompiler.cmake -------------------------------------------------------------------------------- /cmake/module/CMakeTestCCECompiler.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_CCE_COMPILER_WORKS 1 CACHE INTERNAL "") 2 | -------------------------------------------------------------------------------- /cmake/nvidia.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/nvidia.cmake -------------------------------------------------------------------------------- /cmake/zixiao.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/cmake/zixiao.cmake -------------------------------------------------------------------------------- /docker/Dockerfile.gpu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/docker/Dockerfile.gpu -------------------------------------------------------------------------------- /docker/Dockerfile.npu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/docker/Dockerfile.npu -------------------------------------------------------------------------------- /docker/Dockerfile.tencentos4.gpu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/docker/Dockerfile.tencentos4.gpu -------------------------------------------------------------------------------- /docs/img/webchat-github.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/docs/img/webchat-github.jpg -------------------------------------------------------------------------------- /docs/monitoring_metrics_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/docs/monitoring_metrics_guide.md -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/api_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/api_client.py -------------------------------------------------------------------------------- /examples/api_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/api_demo.py -------------------------------------------------------------------------------- /examples/deepseek_fp8_perf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/deepseek_fp8_perf.yaml -------------------------------------------------------------------------------- /examples/deepseek_int4_perf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/deepseek_int4_perf.yaml -------------------------------------------------------------------------------- /examples/deepseekv2/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/deepseekv2/config.json -------------------------------------------------------------------------------- /examples/ksana_llm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm.yaml -------------------------------------------------------------------------------- /examples/ksana_llm2-7b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm2-7b.yaml -------------------------------------------------------------------------------- /examples/ksana_llm_ascend_llama13b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm_ascend_llama13b.yaml -------------------------------------------------------------------------------- /examples/ksana_llm_ascend_llama13b_multi_npu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm_ascend_llama13b_multi_npu.yaml -------------------------------------------------------------------------------- /examples/ksana_llm_deepseekv2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm_deepseekv2.yaml -------------------------------------------------------------------------------- /examples/ksana_llm_disaggregating_pd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm_disaggregating_pd.yaml -------------------------------------------------------------------------------- /examples/ksana_llm_distributed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm_distributed.yaml -------------------------------------------------------------------------------- /examples/ksana_llm_ixc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm_ixc.yaml -------------------------------------------------------------------------------- /examples/ksana_llm_llama2_13b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm_llama2_13b.yaml -------------------------------------------------------------------------------- /examples/ksana_llm_qwen2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm_qwen2.yaml -------------------------------------------------------------------------------- /examples/ksana_llm_qwen3_32b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm_qwen3_32b.yaml -------------------------------------------------------------------------------- /examples/ksana_llm_qwenvl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/ksana_llm_qwenvl.yaml -------------------------------------------------------------------------------- /examples/llama7b/ksana_llm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/llama7b/ksana_llm.yaml -------------------------------------------------------------------------------- /examples/llama7b/ksana_llm_ascend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/llama7b/ksana_llm_ascend.yaml -------------------------------------------------------------------------------- /examples/llama7b/ksana_llm_blocked_prefill.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/llama7b/ksana_llm_blocked_prefill.yaml -------------------------------------------------------------------------------- /examples/llama7b/ksana_llm_dp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/llama7b/ksana_llm_dp.yaml -------------------------------------------------------------------------------- /examples/llama7b/ksana_llm_tp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/llama7b/ksana_llm_tp.yaml -------------------------------------------------------------------------------- /examples/llama7b/serving_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/llama7b/serving_client.py -------------------------------------------------------------------------------- /examples/streaming_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/examples/streaming_client.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build_ext] 2 | inplace=1 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/setup.py -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/batch_manager/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_manager/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/batch_manager/batch_manager.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_manager/batch_manager.cpp -------------------------------------------------------------------------------- /src/ksana_llm/batch_manager/batch_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_manager/batch_manager.h -------------------------------------------------------------------------------- /src/ksana_llm/batch_manager/batch_manager_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_manager/batch_manager_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/batch_manager/schedule_processor.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_manager/schedule_processor.cpp -------------------------------------------------------------------------------- /src/ksana_llm/batch_manager/schedule_processor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_manager/schedule_processor.h -------------------------------------------------------------------------------- /src/ksana_llm/batch_scheduler/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_scheduler/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/batch_scheduler/batch_scheduler.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_scheduler/batch_scheduler.cpp -------------------------------------------------------------------------------- /src/ksana_llm/batch_scheduler/batch_scheduler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_scheduler/batch_scheduler.h -------------------------------------------------------------------------------- /src/ksana_llm/batch_scheduler/batch_scheduler_test.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_scheduler/batch_scheduler_test.h -------------------------------------------------------------------------------- /src/ksana_llm/batch_scheduler/state/batch_state.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/batch_scheduler/state/batch_state.h -------------------------------------------------------------------------------- /src/ksana_llm/cache_manager/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cache_manager/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/cache_manager/base_cache_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cache_manager/base_cache_manager.h -------------------------------------------------------------------------------- /src/ksana_llm/cache_manager/cache_manager_factory.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cache_manager/cache_manager_factory.cpp -------------------------------------------------------------------------------- /src/ksana_llm/cache_manager/cache_manager_factory.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cache_manager/cache_manager_factory.h -------------------------------------------------------------------------------- /src/ksana_llm/cache_manager/cache_manager_interface.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cache_manager/cache_manager_interface.h -------------------------------------------------------------------------------- /src/ksana_llm/cache_manager/direct_cache_manager.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cache_manager/direct_cache_manager.cpp -------------------------------------------------------------------------------- /src/ksana_llm/cache_manager/direct_cache_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cache_manager/direct_cache_manager.h -------------------------------------------------------------------------------- /src/ksana_llm/cache_manager/prefix_cache_manager.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cache_manager/prefix_cache_manager.cpp -------------------------------------------------------------------------------- /src/ksana_llm/cache_manager/prefix_cache_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cache_manager/prefix_cache_manager.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/connector/communicator/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/communicator/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/connector/communicator/communicator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/communicator/communicator.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/config.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/connector.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/connector.cpp -------------------------------------------------------------------------------- /src/ksana_llm/connector/connector.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/connector.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/connector_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/connector_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/connector/coordinator/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/coordinator/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/connector/coordinator/coordinator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/coordinator/coordinator.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/device_collector.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/device_collector.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/device_info_manager.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/device_info_manager.cpp -------------------------------------------------------------------------------- /src/ksana_llm/connector/device_info_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/device_info_manager.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/node_info.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/node_info.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/router_client/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/router_client/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/connector/router_client/router_client.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/router_client/router_client.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/task_dispatcher.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/task_dispatcher.cpp -------------------------------------------------------------------------------- /src/ksana_llm/connector/task_dispatcher.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/task_dispatcher.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/task_dispatcher_nccl_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/task_dispatcher_nccl_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/connector/task_key.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/task_key.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/task_manager.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/task_manager.cpp -------------------------------------------------------------------------------- /src/ksana_llm/connector/task_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/task_manager.h -------------------------------------------------------------------------------- /src/ksana_llm/connector/task_manager_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/connector/task_manager_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cpp/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/cpp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cpp/README.md -------------------------------------------------------------------------------- /src/ksana_llm/cpp/serving_forward_client.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/cpp/serving_forward_client.cpp -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/data_hub.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/data_hub.cpp -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/data_hub.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/data_hub.h -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/data_hub_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/data_hub_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/expert_data_hub.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/expert_data_hub.cpp -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/expert_data_hub.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/expert_data_hub.h -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/hidden_unit_buffer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/hidden_unit_buffer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/hidden_unit_buffer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/hidden_unit_buffer.h -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/hidden_unit_buffer_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/hidden_unit_buffer_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/schedule_output.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/schedule_output.cpp -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/schedule_output.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/schedule_output.h -------------------------------------------------------------------------------- /src/ksana_llm/data_hub/schedule_output_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/data_hub/schedule_output_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/distributed/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/distributed/control_channel.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/control_channel.cpp -------------------------------------------------------------------------------- /src/ksana_llm/distributed/control_channel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/control_channel.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/control_channel_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/control_channel_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/distributed/control_message.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/control_message.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/data_channel.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/data_channel.cpp -------------------------------------------------------------------------------- /src/ksana_llm/distributed/data_channel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/data_channel.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/data_channel_factory.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/data_channel_factory.cpp -------------------------------------------------------------------------------- /src/ksana_llm/distributed/data_channel_factory.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/data_channel_factory.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/data_channel_interface.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/data_channel_interface.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/data_channel_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/data_channel_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/distributed/distributed_coordinator.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/distributed_coordinator.cpp -------------------------------------------------------------------------------- /src/ksana_llm/distributed/distributed_coordinator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/distributed_coordinator.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/node_info.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/node_info.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/nvidia/nccl_data_channel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/nvidia/nccl_data_channel.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/packet_type.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/packet_type.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/packet_util.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/packet_util.cpp -------------------------------------------------------------------------------- /src/ksana_llm/distributed/packet_util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/packet_util.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/raw_packet.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/raw_packet.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/raw_socket.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/raw_socket.cpp -------------------------------------------------------------------------------- /src/ksana_llm/distributed/raw_socket.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/raw_socket.h -------------------------------------------------------------------------------- /src/ksana_llm/distributed/raw_socket_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/distributed/raw_socket_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/endpoints/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/endpoints/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/endpoints/endpoint_factory.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/endpoints/endpoint_factory.cpp -------------------------------------------------------------------------------- /src/ksana_llm/endpoints/endpoint_factory.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/endpoints/endpoint_factory.h -------------------------------------------------------------------------------- /src/ksana_llm/endpoints/http/http_endpoint.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/endpoints/http/http_endpoint.cpp -------------------------------------------------------------------------------- /src/ksana_llm/endpoints/http/http_endpoint.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/endpoints/http/http_endpoint.h -------------------------------------------------------------------------------- /src/ksana_llm/endpoints/local/local_endpoint.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/endpoints/local/local_endpoint.cpp -------------------------------------------------------------------------------- /src/ksana_llm/endpoints/local/local_endpoint.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/endpoints/local/local_endpoint.h -------------------------------------------------------------------------------- /src/ksana_llm/endpoints/rpc/rpc_endpoint.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/endpoints/rpc/rpc_endpoint.h -------------------------------------------------------------------------------- /src/ksana_llm/endpoints/wrapper/trpc/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/endpoints/wrapper/trpc/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/endpoints/wrapper/trpc/trpc_endpoint.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/endpoints/wrapper/trpc/trpc_endpoint.h -------------------------------------------------------------------------------- /src/ksana_llm/helpers/environment_test_helper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/helpers/environment_test_helper.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/kernels/argmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/argmax.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/ascend/kernel_wrapper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/ascend/kernel_wrapper.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/ascend/kernel_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/ascend/kernel_wrapper.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/ascend/trans_layout.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/ascend/trans_layout.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/ascend/trans_layout_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/ascend/trans_layout_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/cast.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/cast.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/grouped_topk.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/grouped_topk.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/basic_kernel_wrapper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/basic_kernel_wrapper.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/basic_kernel_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/basic_kernel_wrapper.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/flash_attn_cpp_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/flash_attn_cpp_wrapper.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/fused_moe_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/fused_moe_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/grouped_topk_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/grouped_topk_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/kernel_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/kernel_wrapper.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/kernel_wrapper_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/kernel_wrapper_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/moe_kernel_wrapper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/moe_kernel_wrapper.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/moe_kernel_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/moe_kernel_wrapper.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/trans_layout.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/trans_layout.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/triton_wrapper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/triton_wrapper.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/nvidia/triton_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/nvidia/triton_wrapper.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/permute.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/permute.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/trans_layout.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/trans_layout.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/zixiao/cast.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/zixiao/cast.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/zixiao/kernel_wrapper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/zixiao/kernel_wrapper.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/zixiao/kernel_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/zixiao/kernel_wrapper.h -------------------------------------------------------------------------------- /src/ksana_llm/kernels/zixiao/permute.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/zixiao/permute.cpp -------------------------------------------------------------------------------- /src/ksana_llm/kernels/zixiao/trans_layout.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/kernels/zixiao/trans_layout.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/layers/activation_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/activation_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/add_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/add_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/add_mul_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/add_mul_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/add_norm_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/add_norm_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/activation_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/activation_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/add_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/add_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/add_mul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/add_mul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/add_norm_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/add_norm_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/attention_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/attention_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/batched_matmul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/batched_matmul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/cast_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/cast_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/concat_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/concat_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/emb_lookup_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/emb_lookup_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/flash_attention_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/flash_attention_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/greedy_sampler_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/greedy_sampler_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/hccl_all_gather_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/hccl_all_gather_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/input_refit_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/input_refit_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/layernorm_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/layernorm_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/matmul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/matmul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/mem_adjuster_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/mem_adjuster_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/paged_attention_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/paged_attention_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/permute_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/permute_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/silu_mul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/silu_mul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/ascend/split_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/ascend/split_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/assemble_tokens_hidden_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/assemble_tokens_hidden_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/attention_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/attention_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/base_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/base_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/batched_matmul_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/batched_matmul_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/blockwise_matmul_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/blockwise_matmul_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/cast_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/cast_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/concat_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/concat_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/cpu/emb_lookup_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/cpu/emb_lookup_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/custom_all_reduce_sum_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/custom_all_reduce_sum_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/cutlass_matmul_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/cutlass_matmul_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/cutlass_moe_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/cutlass_moe_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/eccl_all_gather_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/eccl_all_gather_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/eccl_all_reduce_sum_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/eccl_all_reduce_sum_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/emb_lookup_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/emb_lookup_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/flash_attention_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/flash_attention_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/flash_mla_attention_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/flash_mla_attention_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/flash_sparse_mla_indexer_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/flash_sparse_mla_indexer_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/fp8_matmul_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/fp8_matmul_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/fp8_moe_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/fp8_moe_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/greedy_sampler_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/greedy_sampler_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/grouped_topk_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/grouped_topk_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/hccl_all_gather_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/hccl_all_gather_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/hccl_all_reduce_sum_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/hccl_all_reduce_sum_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/input_refit_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/input_refit_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/layer_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/layer_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/layer_workspace_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/layer_workspace_manager.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/layernorm_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/layernorm_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/machete_matmul_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/machete_matmul_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/marlin_matmul_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/marlin_matmul_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/marlin_moe_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/marlin_moe_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/matmul_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/matmul_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/matmul_layer_factory.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/matmul_layer_factory.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/matmul_layer_factory.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/matmul_layer_factory.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/mem_adjuster_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/mem_adjuster_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/moe_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/moe_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/moe_layer_factory.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/moe_layer_factory.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/moe_layer_factory.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/moe_layer_factory.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/mul_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/mul_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/nccl_all_gather_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nccl_all_gather_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/nccl_all_reduce_sum_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nccl_all_reduce_sum_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/activation_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/activation_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/add_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/add_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/add_mul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/add_mul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/add_norm_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/add_norm_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/attention_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/attention_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/batched_matmul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/batched_matmul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/cast_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/cast_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/concat_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/concat_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/cutlass_matmul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/cutlass_matmul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/cutlass_moe_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/cutlass_moe_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/emb_lookup_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/emb_lookup_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/flash_attention_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/flash_attention_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/fp8_matmul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/fp8_matmul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/fp8_moe_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/fp8_moe_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/greedy_sampler_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/greedy_sampler_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/grouped_topk_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/grouped_topk_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/input_refit_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/input_refit_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/layernorm_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/layernorm_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/machete_matmul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/machete_matmul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/marlin_matmul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/marlin_matmul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/marlin_moe_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/marlin_moe_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/matmul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/matmul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/mem_adjuster_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/mem_adjuster_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/moe_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/moe_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/mul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/mul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/nccl_all_gather_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/nccl_all_gather_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/paged_attention_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/paged_attention_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/permute_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/permute_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/silu_mul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/silu_mul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/nvidia/split_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/nvidia/split_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/paged_attention_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/paged_attention_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/paged_mla_attention_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/paged_mla_attention_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/paged_sparse_mla_indexer_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/paged_sparse_mla_indexer_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/permute_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/permute_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/set_torch_stream_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/set_torch_stream_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/silu_mul_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/silu_mul_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/split_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/split_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/activation_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/activation_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/add_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/add_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/add_norm_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/add_norm_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/attention_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/attention_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/cast_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/cast_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/eccl_all_gather_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/eccl_all_gather_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/emb_lookup_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/emb_lookup_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/flash_attention_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/flash_attention_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/input_refit_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/input_refit_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/layernorm_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/layernorm_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/matmul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/matmul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/paged_attention_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/paged_attention_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/layers/zixiao/silu_mul_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/layers/zixiao/silu_mul_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/model_loader/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/model_loader/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/model_loader/check_gguf_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/model_loader/check_gguf_tensor.py -------------------------------------------------------------------------------- /src/ksana_llm/model_loader/model_loader_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/model_loader/model_loader_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/model_loader/model_loader_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/model_loader/model_loader_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/model_loader/model_loader_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/model_loader/model_loader_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/model_performance/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/model_performance/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/model_performance/test_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/model_performance/test_config.json -------------------------------------------------------------------------------- /src/ksana_llm/models/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/models/baichuan/baichuan_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/baichuan/baichuan_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/baichuan/baichuan_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/baichuan/baichuan_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/baichuan/baichuan_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/baichuan/baichuan_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/baichuan/baichuan_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/baichuan/baichuan_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/models/base/base_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/base_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/base_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/base_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/base_model_config.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/base_model_config.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/base_model_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/base_model_config.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/base_model_config_parser.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/base_model_config_parser.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/base_model_weight_loader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/base_model_weight_loader.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/base_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/base_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/base_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/base_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/buffer_manager.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/buffer_manager.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/buffer_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/buffer_manager.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/fake_weight_for_test.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/fake_weight_for_test.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/fake_weight_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/fake_weight_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/forwarding_context.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/forwarding_context.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/forwarding_context.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/forwarding_context.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/layer_creation_context.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/layer_creation_context.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/layer_creation_context.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/layer_creation_context.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_arch.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_arch.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_arch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_arch.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_communicator.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_communicator.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_communicator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_communicator.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_format.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_format.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_input.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_input.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_input.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_input.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_input_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_input_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_output.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_output.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_output.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_output.h -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/base/model_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/base/model_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/chatglm/chatglm_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/chatglm/chatglm_config.h -------------------------------------------------------------------------------- /src/ksana_llm/models/chatglm/chatglm_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/chatglm/chatglm_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/chatglm/chatglm_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/chatglm/chatglm_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/chatglm/chatglm_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/chatglm/chatglm_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/chatglm/chatglm_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/chatglm/chatglm_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/common/common_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common/common_config.h -------------------------------------------------------------------------------- /src/ksana_llm/models/common/common_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common/common_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/common/common_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common/common_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/common/common_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common/common_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/common/common_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common/common_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/common/model_interface.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common/model_interface.h -------------------------------------------------------------------------------- /src/ksana_llm/models/common/model_test_helper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common/model_test_helper.h -------------------------------------------------------------------------------- /src/ksana_llm/models/common/simple_decoder_layer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common/simple_decoder_layer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/common/simple_decoder_layer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common/simple_decoder_layer.h -------------------------------------------------------------------------------- /src/ksana_llm/models/common_moe/common_moe_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common_moe/common_moe_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/common_moe/common_moe_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common_moe/common_moe_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/common_moe/moe_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/common_moe/moe_config.h -------------------------------------------------------------------------------- /src/ksana_llm/models/communicator/tp_communicator.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/communicator/tp_communicator.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/communicator/tp_communicator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/communicator/tp_communicator.h -------------------------------------------------------------------------------- /src/ksana_llm/models/deepseek_v3/deepseek_v3_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/deepseek_v3/deepseek_v3_config.h -------------------------------------------------------------------------------- /src/ksana_llm/models/deepseek_v3/deepseek_v3_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/deepseek_v3/deepseek_v3_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/gpt/gpt_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/gpt/gpt_config.h -------------------------------------------------------------------------------- /src/ksana_llm/models/gpt/gpt_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/gpt/gpt_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/gpt/gpt_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/gpt/gpt_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/gpt/gpt_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/gpt/gpt_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/gpt/gpt_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/gpt/gpt_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/internlm2/internlm2_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/internlm2/internlm2_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/internlm2/internlm2_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/internlm2/internlm2_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/internlm2/internlm_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/internlm2/internlm_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/internlm2/internlm_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/internlm2/internlm_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/llama/llama_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama/llama_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/llama/llama_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama/llama_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/llama/llama_model_config.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama/llama_model_config.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/llama/llama_model_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama/llama_model_config.h -------------------------------------------------------------------------------- /src/ksana_llm/models/llama/llama_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama/llama_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/llama/llama_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama/llama_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/llama/llama_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama/llama_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/llama4/llama4_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama4/llama4_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/llama4/llama4_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama4/llama4_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/llama4/llama4_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama4/llama4_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/llama4/llama4_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/llama4/llama4_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/mixtral/mixtral_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/mixtral/mixtral_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/mixtral/mixtral_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/mixtral/mixtral_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/mixtral/mixtral_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/mixtral/mixtral_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/mixtral/mixtral_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/mixtral/mixtral_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/cutlass_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/cutlass_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/cutlass_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/cutlass_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/cutlass_utils_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/cutlass_utils_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/machete_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/machete_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/machete_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/machete_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/marlin_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/marlin_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/marlin_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/marlin_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/marlin_utils_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/marlin_utils_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/quant_fp8_weight_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/quant_fp8_weight_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/quant_int4_weight_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/quant_int4_weight_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/quant_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/quant_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/quant/quant_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/quant/quant_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/new_qwen_config.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/new_qwen_config.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/new_qwen_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/new_qwen_config.h -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/new_qwen_config_parser.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/new_qwen_config_parser.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/new_qwen_config_parser.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/new_qwen_config_parser.h -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/new_qwen_weight_loader.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/new_qwen_weight_loader.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/new_qwen_weight_loader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/new_qwen_weight_loader.h -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/qwen_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/qwen_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/qwen_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/qwen_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/qwen_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/qwen_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/qwen_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/qwen_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen/qwen_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen/qwen_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen2_moe/qwen2_moe_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen2_moe/qwen2_moe_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen2_moe/qwen2_moe_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen2_moe/qwen2_moe_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen2_moe/qwen2_moe_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen2_moe/qwen2_moe_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen2_moe/qwen2_moe_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen2_moe/qwen2_moe_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen3_moe/qwen3_moe_model.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen3_moe/qwen3_moe_model.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen3_moe/qwen3_moe_model.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen3_moe/qwen3_moe_model.h -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen3_moe/qwen3_moe_weight.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen3_moe/qwen3_moe_weight.cpp -------------------------------------------------------------------------------- /src/ksana_llm/models/qwen3_moe/qwen3_moe_weight.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/models/qwen3_moe/qwen3_moe_weight.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/modules/attention/common_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/attention/common_attention.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/attention/common_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/attention/common_attention.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/attention/multihead_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/attention/multihead_attention.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/attention/sparse_mla_indexer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/attention/sparse_mla_indexer.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/activation.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/activation.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/activation.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/activation.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/add.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/add.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/add.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/add.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/add_mul.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/add_mul.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/add_mul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/add_mul.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/add_norm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/add_norm.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/add_norm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/add_norm.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/bmm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/bmm.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/bmm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/bmm.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/bmm_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/bmm_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/flash_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/flash_attention.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/flash_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/flash_attention.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/flash_mla_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/flash_mla_attention.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/flash_mla_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/flash_mla_attention.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/layernorm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/layernorm.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/layernorm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/layernorm.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/linear.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/linear.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/linear.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/linear.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/mem_adjuster.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/mem_adjuster.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/mem_adjuster.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/mem_adjuster.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/moe.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/moe.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/moe.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/moe.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/mul.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/mul.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/mul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/mul.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/paged_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/paged_attention.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/paged_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/paged_attention.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/paged_mla_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/paged_mla_attention.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/paged_mla_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/paged_mla_attention.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/sigmoid.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/sigmoid.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/sigmoid.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/sigmoid.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/silu_mul.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/silu_mul.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/silu_mul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/silu_mul.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/split.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/split.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/basic/split.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/basic/split.h -------------------------------------------------------------------------------- /src/ksana_llm/modules/ffn/two_layered_ffn.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/ffn/two_layered_ffn.cpp -------------------------------------------------------------------------------- /src/ksana_llm/modules/ffn/two_layered_ffn.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/modules/ffn/two_layered_ffn.h -------------------------------------------------------------------------------- /src/ksana_llm/multi_batch_controller/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/multi_batch_controller/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/periphery/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/periphery/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/periphery/version_info.h.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/periphery/version_info.h.in -------------------------------------------------------------------------------- /src/ksana_llm/periphery/version_reporter.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/periphery/version_reporter.cpp -------------------------------------------------------------------------------- /src/ksana_llm/periphery/version_reporter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/periphery/version_reporter.h -------------------------------------------------------------------------------- /src/ksana_llm/periphery/version_reporter_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/periphery/version_reporter_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/plugins/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/plugins/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/plugins/base_plugin.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/plugins/base_plugin.cpp -------------------------------------------------------------------------------- /src/ksana_llm/plugins/base_plugin.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/plugins/base_plugin.h -------------------------------------------------------------------------------- /src/ksana_llm/profiler/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/profiler/profile_event.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/profile_event.cpp -------------------------------------------------------------------------------- /src/ksana_llm/profiler/profile_event.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/profile_event.h -------------------------------------------------------------------------------- /src/ksana_llm/profiler/profiler.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/profiler.cpp -------------------------------------------------------------------------------- /src/ksana_llm/profiler/profiler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/profiler.h -------------------------------------------------------------------------------- /src/ksana_llm/profiler/profiler_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/profiler_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/profiler/reporter.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/reporter.cpp -------------------------------------------------------------------------------- /src/ksana_llm/profiler/reporter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/reporter.h -------------------------------------------------------------------------------- /src/ksana_llm/profiler/sched_event_tracer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/sched_event_tracer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/profiler/sched_event_tracer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/sched_event_tracer.h -------------------------------------------------------------------------------- /src/ksana_llm/profiler/timer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/timer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/profiler/timer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/timer.h -------------------------------------------------------------------------------- /src/ksana_llm/profiler/writer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/writer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/profiler/writer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/profiler/writer.h -------------------------------------------------------------------------------- /src/ksana_llm/python/ksana_llm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/ksana_llm/__init__.py -------------------------------------------------------------------------------- /src/ksana_llm/python/ksana_llm/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/ksana_llm/arg_utils.py -------------------------------------------------------------------------------- /src/ksana_llm/python/ksana_llm/ksana_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/ksana_llm/ksana_engine.py -------------------------------------------------------------------------------- /src/ksana_llm/python/ksana_llm/ksana_plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/ksana_llm/ksana_plugin.py -------------------------------------------------------------------------------- /src/ksana_llm/python/ksana_llm/processor_op_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/ksana_llm/processor_op_base.py -------------------------------------------------------------------------------- /src/ksana_llm/python/ksana_plugin/plugin_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/ksana_plugin/plugin_model.py -------------------------------------------------------------------------------- /src/ksana_llm/python/ksana_plugin/plugin_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/ksana_plugin/plugin_utils.py -------------------------------------------------------------------------------- /src/ksana_llm/python/ksana_plugin/qwen_vl/requirements.txt: -------------------------------------------------------------------------------- 1 | onnx>=1.12.0 2 | tensorrt==8.6.1 3 | torchvision 4 | matplotlib 5 | tiktoken 6 | -------------------------------------------------------------------------------- /src/ksana_llm/python/ksana_plugin/trt_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/ksana_plugin/trt_engine.py -------------------------------------------------------------------------------- /src/ksana_llm/python/kv_scale_files/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/kv_scale_files/README.md -------------------------------------------------------------------------------- /src/ksana_llm/python/kv_scale_files/README_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/kv_scale_files/README_cn.md -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/openai_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/openaiapi/openai_adapter.py -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/openai_protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/openaiapi/openai_protocol.py -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/readme_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/openaiapi/readme_cn.md -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/reasoning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/openaiapi/reasoning/__init__.py -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/request_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/openaiapi/request_converter.py -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/serving_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/openaiapi/serving_chat.py -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/serving_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/openaiapi/serving_completion.py -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/serving_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/openaiapi/serving_embedding.py -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/serving_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/openaiapi/serving_models.py -------------------------------------------------------------------------------- /src/ksana_llm/python/openaiapi/tool_parsers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/openaiapi/tool_parsers/utils.py -------------------------------------------------------------------------------- /src/ksana_llm/python/serving_forward_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/serving_forward_client.py -------------------------------------------------------------------------------- /src/ksana_llm/python/serving_generate_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/serving_generate_client.py -------------------------------------------------------------------------------- /src/ksana_llm/python/serving_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/serving_server.py -------------------------------------------------------------------------------- /src/ksana_llm/python/serving_server_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/serving_server_test.py -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/README.md -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/__init__.py -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/config.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/config.ini -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/config.py -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/database.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/database.py -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/generate.py -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/main.py -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/models.py -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/name_service/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/node.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/node.py -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/schemas.py -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/services.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/simple_router/services.py -------------------------------------------------------------------------------- /src/ksana_llm/python/simple_router/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ksana_llm/python/tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/tests/conftest.py -------------------------------------------------------------------------------- /src/ksana_llm/python/tests/openai_tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ksana_llm/python/tests/test_constraint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/tests/test_constraint.py -------------------------------------------------------------------------------- /src/ksana_llm/python/tests/test_flexible_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/tests/test_flexible_cache.py -------------------------------------------------------------------------------- /src/ksana_llm/python/tests/test_ksana_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/tests/test_ksana_engine.py -------------------------------------------------------------------------------- /src/ksana_llm/python/tests/test_openai_api_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/tests/test_openai_api_client.py -------------------------------------------------------------------------------- /src/ksana_llm/python/tests/test_processor_op_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/tests/test_processor_op_base.py -------------------------------------------------------------------------------- /src/ksana_llm/python/tests/test_qwen_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/tests/test_qwen_model.py -------------------------------------------------------------------------------- /src/ksana_llm/python/tests/test_structured_output.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/tests/test_structured_output.py -------------------------------------------------------------------------------- /src/ksana_llm/python/tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/tests/utils.py -------------------------------------------------------------------------------- /src/ksana_llm/python/utilize/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/utilize/__init__.py -------------------------------------------------------------------------------- /src/ksana_llm/python/utilize/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/utilize/logger.py -------------------------------------------------------------------------------- /src/ksana_llm/python/utilize/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/utilize/utils.py -------------------------------------------------------------------------------- /src/ksana_llm/python/weight_map/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/weight_map/README.md -------------------------------------------------------------------------------- /src/ksana_llm/python/weight_map/gpt2_weight_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/weight_map/gpt2_weight_map.json -------------------------------------------------------------------------------- /src/ksana_llm/python/weight_map/qwen3_weight_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/weight_map/qwen3_weight_map.json -------------------------------------------------------------------------------- /src/ksana_llm/python/weight_map/qwen_weight_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/python/weight_map/qwen_weight_map.json -------------------------------------------------------------------------------- /src/ksana_llm/runtime/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/runtime/cuda_graph_runner.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/cuda_graph_runner.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/cuda_graph_runner.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/cuda_graph_runner.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/draft_generator/draft_tokens.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/draft_generator/draft_tokens.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/forward_request.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/forward_request.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/generation_controller.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/generation_controller.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/generation_controller.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/generation_controller.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/generation_controller_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/generation_controller_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/infer_request.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/infer_request.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/infer_request.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/infer_request.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/infer_stage.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/infer_stage.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/layer_progress_tracker.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/layer_progress_tracker.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/layer_progress_tracker.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/layer_progress_tracker.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/layer_progress_tracker_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/layer_progress_tracker_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/llm_runtime.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/llm_runtime.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/llm_runtime.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/llm_runtime.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/llm_runtime_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/llm_runtime_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/model_instance.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/model_instance.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/model_instance.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/model_instance.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/request_state.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/request_state.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/sampling_request.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/sampling_request.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/threadpool.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/threadpool.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/threadpool_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/threadpool_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/weight_instance.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/weight_instance.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/weight_instance.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/weight_instance.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/weight_instance_inferface.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/weight_instance_inferface.h -------------------------------------------------------------------------------- /src/ksana_llm/runtime/weight_instance_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/weight_instance_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/worker.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/worker.cpp -------------------------------------------------------------------------------- /src/ksana_llm/runtime/worker.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/runtime/worker.h -------------------------------------------------------------------------------- /src/ksana_llm/samplers/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/samplers/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/samplers/base/base_sampling.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/samplers/base/base_sampling.cpp -------------------------------------------------------------------------------- /src/ksana_llm/samplers/base/base_sampling.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/samplers/base/base_sampling.h -------------------------------------------------------------------------------- /src/ksana_llm/samplers/sampler.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/samplers/sampler.cpp -------------------------------------------------------------------------------- /src/ksana_llm/samplers/sampler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/samplers/sampler.h -------------------------------------------------------------------------------- /src/ksana_llm/samplers/sampler_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/samplers/sampler_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/samplers/topk/topk_sampling.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/samplers/topk/topk_sampling.cpp -------------------------------------------------------------------------------- /src/ksana_llm/samplers/topk/topk_sampling.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/samplers/topk/topk_sampling.h -------------------------------------------------------------------------------- /src/ksana_llm/samplers/topp/topp_sampling.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/samplers/topp/topp_sampling.cpp -------------------------------------------------------------------------------- /src/ksana_llm/samplers/topp/topp_sampling.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/samplers/topp/topp_sampling.h -------------------------------------------------------------------------------- /src/ksana_llm/service/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/service/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/service/inference_engine.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/service/inference_engine.cpp -------------------------------------------------------------------------------- /src/ksana_llm/service/inference_engine.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/service/inference_engine.h -------------------------------------------------------------------------------- /src/ksana_llm/service/inference_engine_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/service/inference_engine_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/service/inference_server.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/service/inference_server.cpp -------------------------------------------------------------------------------- /src/ksana_llm/service/inference_server.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/service/inference_server.h -------------------------------------------------------------------------------- /src/ksana_llm/service/service_lifetime.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/service/service_lifetime.cpp -------------------------------------------------------------------------------- /src/ksana_llm/service/service_lifetime.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/service/service_lifetime.h -------------------------------------------------------------------------------- /src/ksana_llm/service/service_lifetime_interface.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/service/service_lifetime_interface.h -------------------------------------------------------------------------------- /src/ksana_llm/torch_op/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/torch_op/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/torch_op/serving_op.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/torch_op/serving_op.cpp -------------------------------------------------------------------------------- /src/ksana_llm/torch_op/serving_op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/torch_op/serving_op.h -------------------------------------------------------------------------------- /src/ksana_llm/torch_op/serving_op_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/torch_op/serving_op_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/transfer/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/transfer/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/transfer/transfer_engine.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/transfer/transfer_engine.cpp -------------------------------------------------------------------------------- /src/ksana_llm/transfer/transfer_engine.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/transfer/transfer_engine.h -------------------------------------------------------------------------------- /src/ksana_llm/transfer/transfer_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/transfer/transfer_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/transfer/transfer_types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/transfer/transfer_types.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/CMakeLists.txt -------------------------------------------------------------------------------- /src/ksana_llm/utils/ascend/acl_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/ascend/acl_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/ascend/acl_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/ascend/acl_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/ascend/ascend_context.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/ascend/ascend_context.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/ascend/ascend_context.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/ascend/ascend_context.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/ascend/ascend_device.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/ascend/ascend_device.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/ascend/ascend_device.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/ascend/ascend_device.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/ascend/hccl_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/ascend/hccl_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/barrier.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/barrier.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/base_file_tensor_loader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/base_file_tensor_loader.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/blocking_queue.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/blocking_queue.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/calc_intvec_hash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/calc_intvec_hash.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/channel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/channel.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/channel_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/channel_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/common_context.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/common_context.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/common_context.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/common_context.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/common_device.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/common_device.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/config/model_config_parser.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/config/model_config_parser.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/config/model_config_parser.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/config/model_config_parser.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/config/schedule_config_parser.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/config/schedule_config_parser.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/context.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/context.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/context_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/context_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/device_types.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/device_types.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/device_types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/device_types.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/device_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/device_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/dynamic_memory_counter.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/dynamic_memory_counter.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/dynamic_memory_counter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/dynamic_memory_counter.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/dynamic_memory_pool.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/dynamic_memory_pool.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/dynamic_memory_pool.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/dynamic_memory_pool.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/dynamic_memory_pool_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/dynamic_memory_pool_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/environment.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/environment.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/environment.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/environment.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/environment_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/environment_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/expert_parallel_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/expert_parallel_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/finite_state_machine.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/finite_state_machine.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/finite_state_machine.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/finite_state_machine.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/finite_state_machine_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/finite_state_machine_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/forward_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/forward_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/get_custom_weight_name.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/get_custom_weight_name.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/get_custom_weight_name.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/get_custom_weight_name.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/gguf_file_tensor_loader.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/gguf_file_tensor_loader.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/gguf_file_tensor_loader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/gguf_file_tensor_loader.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/gguf_file_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/gguf_file_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/gguf_file_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/gguf_file_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/grammar_backend.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/grammar_backend.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/grammar_backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/grammar_backend.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/grammar_matcher.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/grammar_matcher.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/grammar_matcher.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/grammar_matcher.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/grammar_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/grammar_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/id_generator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/id_generator.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/json_config_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/json_config_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/json_config_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/json_config_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/logger.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/logger.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/logger.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/logger.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/logger_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/logger_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/memory_allocator.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/memory_allocator.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/memory_allocator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/memory_allocator.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/memory_allocator_interface.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/memory_allocator_interface.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/memory_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/memory_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/memory_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/memory_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/memory_utils_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/memory_utils_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/nvidia/cuda_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/nvidia/cuda_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/nvidia/cuda_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/nvidia/cuda_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/nvidia/nccl_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/nvidia/nccl_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/nvidia/nccl_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/nvidia/nccl_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/nvidia/nvidia_context.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/nvidia/nvidia_context.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/nvidia/nvidia_context.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/nvidia/nvidia_context.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/nvidia/nvidia_device.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/nvidia/nvidia_device.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/nvidia/nvidia_device.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/nvidia/nvidia_device.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/optional_file.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/optional_file.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/pinned_mem_buffer_pool.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/pinned_mem_buffer_pool.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/pytorch_file_tensor_loader.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/pytorch_file_tensor_loader.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/pytorch_file_tensor_loader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/pytorch_file_tensor_loader.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/quantization.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/quantization.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/reasoning_config.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/reasoning_config.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/reasoning_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/reasoning_config.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/reasoning_config_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/reasoning_config_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/request.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/request.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/request.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/request.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/request_packer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/request_packer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/request_packer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/request_packer.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/request_packer_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/request_packer_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/request_serial.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/request_serial.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/request_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/request_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/ret_code.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/ret_code.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/safetensors_file_saver.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/safetensors_file_saver.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/safetensors_file_saver.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/safetensors_file_saver.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/schedule_output_process.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/schedule_output_process.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/schedule_output_process.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/schedule_output_process.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/search_path.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/search_path.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/search_path.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/search_path.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/search_status.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/search_status.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/service_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/service_utils.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/service_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/service_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/singleton.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/singleton.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/singleton_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/singleton_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/socket_util.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/socket_util.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/socket_util.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/socket_util.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/socket_util_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/socket_util_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/status.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/status.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/status.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/status.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/stop_checker.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/stop_checker.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/stop_checker.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/stop_checker.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/stop_checker_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/stop_checker_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/string_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/string_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/tensor.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/tensor.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/tensor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/tensor.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/tensor_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/tensor_manager.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/tensor_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/tensor_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/tensor_test_helper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/tensor_test_helper.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/tokenizer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/tokenizer.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/tokenizer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/tokenizer.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/tokenizer_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/tokenizer_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/utils_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/utils_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/waiter.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/waiter.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/waiter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/waiter.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/waiter_test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/waiter_test.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/yaml_reader.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/yaml_reader.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/yaml_reader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/yaml_reader.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/zixiao/tops_utils.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ksana_llm/utils/zixiao/tops_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/zixiao/tops_utils.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/zixiao/zixiao_context.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/zixiao/zixiao_context.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/zixiao/zixiao_context.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/zixiao/zixiao_context.h -------------------------------------------------------------------------------- /src/ksana_llm/utils/zixiao/zixiao_device.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/zixiao/zixiao_device.cpp -------------------------------------------------------------------------------- /src/ksana_llm/utils/zixiao/zixiao_device.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/src/ksana_llm/utils/zixiao/zixiao_device.h -------------------------------------------------------------------------------- /tests/integration_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tests/integration_test.py -------------------------------------------------------------------------------- /tests/test.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tests/test.cpp -------------------------------------------------------------------------------- /tests/test.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tests/test.h -------------------------------------------------------------------------------- /tests/tiny_model_configs/gpt_model_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tests/tiny_model_configs/gpt_model_config.json -------------------------------------------------------------------------------- /tests/tiny_model_configs/moe_model_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tests/tiny_model_configs/moe_model_config.json -------------------------------------------------------------------------------- /tests/tiny_model_configs/simple_model_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tests/tiny_model_configs/simple_model_config.json -------------------------------------------------------------------------------- /tests/tiny_model_configs/vl_model_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tests/tiny_model_configs/vl_model_config.json -------------------------------------------------------------------------------- /tests/triton_wrapper_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tests/triton_wrapper_test.py -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/CMakeLists.txt -------------------------------------------------------------------------------- /tools/deepep_wrapper/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/CMakeLists.txt -------------------------------------------------------------------------------- /tools/deepep_wrapper/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/README.md -------------------------------------------------------------------------------- /tools/deepep_wrapper/src/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/src/CMakeLists.txt -------------------------------------------------------------------------------- /tools/deepep_wrapper/src/common.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/src/common.cpp -------------------------------------------------------------------------------- /tools/deepep_wrapper/src/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/src/common.h -------------------------------------------------------------------------------- /tools/deepep_wrapper/src/deep_ep.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/src/deep_ep.cpp -------------------------------------------------------------------------------- /tools/deepep_wrapper/src/deep_ep.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/src/deep_ep.h -------------------------------------------------------------------------------- /tools/deepep_wrapper/src/main.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/src/main.cpp -------------------------------------------------------------------------------- /tools/deepep_wrapper/src/process.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/src/process.cpp -------------------------------------------------------------------------------- /tools/deepep_wrapper/src/wrapper.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/src/wrapper.cpp -------------------------------------------------------------------------------- /tools/deepep_wrapper/src/wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/deepep_wrapper/src/wrapper.h -------------------------------------------------------------------------------- /tools/eplb/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/eplb/README.md -------------------------------------------------------------------------------- /tools/eplb/expert_activation_heatmap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/eplb/expert_activation_heatmap.py -------------------------------------------------------------------------------- /tools/eplb/expert_parallel_get_map.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/eplb/expert_parallel_get_map.py -------------------------------------------------------------------------------- /tools/generate_triton_kernel_cubin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/generate_triton_kernel_cubin.sh -------------------------------------------------------------------------------- /tools/get_nvidia_gpu_properties.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/get_nvidia_gpu_properties.py -------------------------------------------------------------------------------- /tools/profiler/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/profiler/README.md -------------------------------------------------------------------------------- /tools/profiler/README_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/profiler/README_cn.md -------------------------------------------------------------------------------- /tools/profiler/sched_event_trace_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tencent/KsanaLLM/HEAD/tools/profiler/sched_event_trace_process.py --------------------------------------------------------------------------------