├── .clang-format ├── .devcontainer ├── cu126 │ └── devcontainer.json ├── cu128 │ └── devcontainer.json ├── cu129 │ └── devcontainer.json └── cu130 │ └── devcontainer.json ├── .github ├── CODEOWNERS ├── pull_request_template.md └── workflows │ ├── build-doc.yml │ ├── new-issue.yml │ ├── nightly-release.yml │ ├── pre-commit.yml │ ├── release-ci-docker.yml │ ├── release.yml │ └── update-codeowners.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Jenkinsfile ├── LICENSE ├── NOTICE ├── README.md ├── benchmarks ├── README.md ├── bench_append_paged_kv_cache.py ├── bench_append_paged_mla_kv_cache.py ├── bench_attention_sink_triton_sgl_context.py ├── bench_attention_sink_triton_sgl_decode.py ├── bench_batch_attention.py ├── bench_batch_decode.py ├── bench_blackwell_attention.py ├── bench_block_sparse_attention.py ├── bench_cute_dsl_blockscaled_gemm.py ├── bench_cutlass_fused_moe.py ├── bench_deepgemm_blackwell.py ├── bench_deepseek_mla.py ├── bench_fused_add_rmsnorm.py ├── bench_groupwise_gemm_fp8_blackwell.py ├── bench_groupwise_grouped_gemm_fp8_blackwell.py ├── bench_groupwise_grouped_gemm_mxfp4_blackwell.py ├── bench_hopper_attention.py ├── bench_hopper_fp8_attention.py ├── bench_hopper_grouped_gemm.py ├── bench_logging_overhead.py ├── bench_mixed_attention.py ├── bench_mm_fp8.py ├── bench_persistent_gemm.py ├── bench_renorm.py ├── bench_rope.py ├── bench_rope_quantize_fp8.py ├── bench_rope_quantize_fp8_append_cache.py ├── bench_sampling.py ├── bench_sliding_window.py ├── bench_softmax.py ├── bench_tgv_gemm.py ├── bench_trtllm_fmha.py ├── bench_trtllm_gen_fused_moe_autotuner.py ├── bench_trtllm_gen_mla.py ├── flashinfer_benchmark.py ├── routines │ ├── __init__.py │ ├── attention.py │ ├── flashinfer_benchmark_utils.py │ ├── gemm.py │ └── moe.py ├── samples │ ├── sample_testlist.txt │ ├── sample_testlist_output.csv │ └── sample_testlist_output.txt └── test_flashinfer_benchmark.py ├── ci ├── bash.sh ├── docker-tags.yml └── scripts │ └── jenkins │ ├── git_skip_ci.py │ ├── git_skip_ci_globs.py │ ├── git_utils.py │ └── retry.sh ├── csrc ├── batch_attention.cu ├── batch_attention_customize_config.jinja ├── batch_attention_jit_binding.cu ├── batch_attention_paged_kernel_inst.jinja ├── batch_decode.cu ├── batch_decode_customize_config.jinja ├── batch_decode_jit_binding.cu ├── batch_decode_kernel_inst.jinja ├── batch_decode_mla_binding.cu ├── batch_decode_mla_config.jinja ├── batch_decode_mla_cute_sm80.cu ├── batch_decode_mla_plan.cu ├── batch_decode_mla_run.cu ├── batch_mla_binding.cu ├── batch_mla_config.jinja ├── batch_mla_plan.cu ├── batch_mla_run.cu ├── batch_mla_sm90_binding.cu ├── batch_mla_sm90_plan.cu ├── batch_mla_sm90_run.cu ├── batch_pod.cu ├── batch_pod_customize_config.jinja ├── batch_pod_jit_binding.cu ├── batch_pod_kernel_inst.jinja ├── batch_prefill.cu ├── batch_prefill_customize_config.jinja ├── batch_prefill_fp8_paged_sm90_kernel_inst.jinja ├── batch_prefill_fp8_ragged_sm90_kernel_inst.jinja ├── batch_prefill_fp8_sm90.cu ├── batch_prefill_jit_binding.cu ├── batch_prefill_paged_kernel_inst.jinja ├── batch_prefill_paged_sm90_kernel_inst.jinja ├── batch_prefill_ragged_kernel_inst.jinja ├── batch_prefill_ragged_sm90_kernel_inst.jinja ├── batch_prefill_sm90.cu ├── batch_prefill_sm90_customize_config.jinja ├── batch_prefill_sm90_jit_binding.cu ├── blackwell_fmha_plan.cu ├── bmm_fp8.cu ├── cascade.cu ├── cudnn_sdpa_kernel_launcher.cu ├── cudnn_sdpa_utils.h ├── cutlass_mla.cu ├── dsv3_router_gemm.cu ├── flashinfer_cascade_binding.cu ├── flashinfer_gemm_binding.cu ├── flashinfer_gemm_sm90_binding.cu ├── flashinfer_mla_binding.cu ├── flashinfer_norm_binding.cu ├── flashinfer_page_binding.cu ├── flashinfer_quantization_binding.cu ├── flashinfer_rope_binding.cu ├── flashinfer_sampling_binding.cu ├── flashinfer_xqa_binding.cu ├── fmhaReduction.cu ├── fmha_cutlass_sm100.cu ├── fmha_cutlass_sm100_binding.cu ├── fmha_v2 │ ├── convert.cu │ ├── fmha │ │ ├── alibi_params.h │ │ ├── fragment.h │ │ ├── gemm.h │ │ ├── gmem_tile_o.h │ │ ├── gmem_tile_o_packed.h │ │ ├── gmem_tile_ps.h │ │ ├── gmem_tile_qkv.h │ │ ├── gmem_tile_qkv_packed.h │ │ ├── hopper │ │ │ ├── arrive_wait.h │ │ │ ├── compute_tile.h │ │ │ ├── fragment.h │ │ │ ├── gmem_tile_o_packed.h │ │ │ ├── gmem_tile_qkv_packed.h │ │ │ ├── gmma_descriptor.h │ │ │ ├── kernel_traits.h │ │ │ ├── smem_tile.h │ │ │ ├── smem_tile_o.h │ │ │ ├── tma_descriptor.h │ │ │ ├── tma_types.h │ │ │ ├── utils_gmma.h │ │ │ ├── utils_hgmma.h │ │ │ ├── utils_hgmma_bf16.h │ │ │ ├── utils_igmma.h │ │ │ ├── utils_qgmma.h │ │ │ ├── utils_tma.h │ │ │ └── utils_warpgroup.h │ │ ├── kernel_traits.h │ │ ├── mask.h │ │ ├── numeric_types.h │ │ ├── paged_kv_cache.h │ │ ├── smem_tile.h │ │ ├── smem_tile_o.h │ │ ├── smem_tile_qkv.h │ │ ├── smem_tile_v.h │ │ ├── softmax.h │ │ ├── traits.h │ │ ├── utils.h │ │ └── warpspec │ │ │ ├── circular_buffer.h │ │ │ ├── compute.h │ │ │ ├── dma.h │ │ │ ├── epilogue.h │ │ │ └── kernel_traits.h │ ├── fused_multihead_attention.cpp │ ├── fused_multihead_attention.h │ ├── fused_multihead_attention_demo_bert_params.h │ ├── fused_multihead_attention_kernel.h │ ├── fused_multihead_attention_kernel_1xN.h │ ├── fused_multihead_attention_kernel_1xN_multi_cta.h │ ├── fused_multihead_attention_kernel_1xN_noloop.h │ ├── fused_multihead_attention_kernel_2x2.h │ ├── fused_multihead_attention_kernel_4x1_hopper.h │ ├── fused_multihead_attention_kernel_4x1_hopper_noloop.h │ ├── fused_multihead_attention_kernel_4xN_hopper.h │ ├── fused_multihead_attention_kernel_4xN_hopper_noloop.h │ ├── fused_multihead_attention_utils.h │ ├── fused_multihead_cross_attention.cpp │ ├── fused_multihead_cross_attention.h │ ├── fused_multihead_cross_attention_kernel_1xN.h │ ├── fused_multihead_cross_attention_kernel_1xN_noloop.h │ ├── fused_multihead_flash_attention_kernel.h │ ├── fused_multihead_flash_attention_kernel_noloop.h │ ├── fused_multihead_flash_attention_kernel_noloop_tiled.h │ ├── softmax_bf16.cu │ ├── softmax_fp16.cu │ ├── softmax_fp32.cu │ ├── softmax_fp8.cu │ ├── softmax_impl.h │ └── softmax_int8.cu ├── fp4_gemm_cutlass.cu ├── fp4_gemm_cutlass.jinja ├── fp4_gemm_cutlass_sm120.cu ├── fp4_gemm_cutlass_sm120.jinja ├── fp8_gemm_cutlass.cu ├── fp8_gemm_cutlass.jinja ├── fused_moe │ ├── cutlass_backend │ │ ├── cutlass_fused_moe_instantiation.cu │ │ ├── cutlass_fused_moe_kernels.cuh │ │ ├── deepgemm_jit_setup.cu │ │ └── flashinfer_cutlass_fused_moe_binding.cu │ ├── moeTopKFuncs.cuh │ └── noAuxTcKernels.cu ├── gemm_groupwise_sm100.cu ├── gemm_groupwise_sm100_kernel_inst.jinja ├── gemm_groupwise_sm120.cu ├── gemm_groupwise_sm120_kernel_inst.jinja ├── gemm_sm100_binding.cu ├── gemm_sm120_binding.cu ├── group_gemm.cu ├── group_gemm_fp8_groupwise_sm100.cu ├── group_gemm_fp8_groupwise_sm100_kernel_inst.jinja ├── group_gemm_fp8_groupwise_sm120.cu ├── group_gemm_fp8_groupwise_sm120_kernel_inst.jinja ├── group_gemm_mxfp4_groupwise_sm100.cu ├── group_gemm_mxfp4_groupwise_sm100_kernel_inst.jinja ├── group_gemm_sm100_binding.cu ├── group_gemm_sm120_binding.cu ├── group_gemm_sm90.cu ├── group_gemm_sm90_kernel_inst.jinja ├── logging.cc ├── norm.cu ├── nv_internal │ ├── cpp │ │ ├── common │ │ │ ├── envUtils.cpp │ │ │ ├── logger.cpp │ │ │ ├── memoryUtils.cu │ │ │ ├── stringUtils.cpp │ │ │ └── tllmException.cpp │ │ └── kernels │ │ │ └── quantization.cu │ ├── include │ │ └── tensorrt_llm │ │ │ └── common │ │ │ ├── NvInferRuntime.h │ │ │ ├── assert.h │ │ │ ├── cudaBf16Wrapper.h │ │ │ ├── cudaFp8Utils.h │ │ │ ├── cudaUtils.h │ │ │ ├── dataType.h │ │ │ ├── logger.h │ │ │ ├── quantization.h │ │ │ ├── stringUtils.h │ │ │ └── tllmException.h │ └── tensorrt_llm │ │ ├── common │ │ ├── cublasMMWrapper.h │ │ ├── cudaBf16Fallbacks.cuh │ │ ├── cudaDriverWrapper.h │ │ ├── cudaTypeUtils.cuh │ │ ├── envUtils.h │ │ ├── memoryUtils.h │ │ ├── quantTypeUtils.cuh │ │ ├── reduceKernelUtils.cuh │ │ └── workspace.h │ │ ├── cutlass_extensions │ │ └── include │ │ │ └── cutlass_extensions │ │ │ ├── arch │ │ │ ├── copy_red_global.hpp │ │ │ ├── copy_sm90_multimem.hpp │ │ │ ├── copy_traits_sm90_multimem.hpp │ │ │ ├── grid_dependency_control.h │ │ │ └── mma.h │ │ │ ├── communication │ │ │ └── collective │ │ │ │ └── sm90_allreduce_nvls_warpspecialized.hpp │ │ │ ├── compute_occupancy.h │ │ │ ├── detail │ │ │ └── collective │ │ │ │ └── mixed_input_utils.hpp │ │ │ ├── epilogue │ │ │ ├── collective │ │ │ │ └── epilogue_moe_finalize.hpp │ │ │ ├── fusion │ │ │ │ ├── sm90_visitor_allreduce_tma_warpspecialized.hpp │ │ │ │ └── sm90_visitor_scatter.hpp │ │ │ └── thread │ │ │ │ └── fused_activations.h │ │ │ ├── epilogue_helpers.h │ │ │ ├── gemm │ │ │ ├── collective │ │ │ │ ├── builders │ │ │ │ │ ├── sm90_gmma_builder_gated.inl │ │ │ │ │ ├── sm90_gmma_builder_interleaved.inl │ │ │ │ │ └── sm90_gmma_builder_mixed_input.inl │ │ │ │ ├── collective_builder_gated.hpp │ │ │ │ ├── collective_builder_interleaved.hpp │ │ │ │ ├── collective_builder_mixed_input.hpp │ │ │ │ ├── collective_mma_array_mixed_input.hpp │ │ │ │ ├── collective_mma_gated.hpp │ │ │ │ ├── collective_mma_interleaved.hpp │ │ │ │ ├── sm90_mma_array_tma_gmma_rs_warpspecialized_mixed_input_.hpp │ │ │ │ ├── sm90_mma_gated_tma_gmma_ss_warpspecialized.hpp │ │ │ │ ├── sm90_mma_gated_tma_gmma_ss_warpspecialized_fp8.hpp │ │ │ │ └── sm90_mma_interleaved_tma_gmma_rs_warpspecialized_mixed_input.hpp │ │ │ ├── kernel │ │ │ │ ├── default_fpA_intB_traits.h │ │ │ │ ├── fused_moe_kernel.cuh │ │ │ │ ├── fused_moe_kernel_routine.cuh │ │ │ │ ├── fused_moe_kernel_traits.cuh │ │ │ │ ├── gemm_moe_problem_visitor.h │ │ │ │ ├── gemm_universal_allreduce.hpp │ │ │ │ ├── mixed_gemm_B_layout.h │ │ │ │ ├── moe_cute_util.cuh │ │ │ │ ├── moe_cutlass_kernel.h │ │ │ │ ├── moe_problem_visitor.h │ │ │ │ ├── sm90_gemm_allreduce_tma_warpspecialized.hpp │ │ │ │ └── sm90_gemm_allreduce_tma_warpspecialized_pingpong.hpp │ │ │ ├── threadblock │ │ │ │ ├── default_dq_mma.h │ │ │ │ ├── default_dq_mma_multistage.h │ │ │ │ ├── default_dq_mma_pipelined.h │ │ │ │ ├── default_mma.h │ │ │ │ ├── default_mma_bf16.h │ │ │ │ ├── dq_mma_base.h │ │ │ │ ├── dq_mma_multistage.h │ │ │ │ ├── dq_mma_multistage_finegrained.h │ │ │ │ ├── dq_mma_multistage_percol.h │ │ │ │ ├── dq_mma_pipelined.h │ │ │ │ ├── dq_mma_pipelined_finegrained.h │ │ │ │ └── dq_mma_pipelined_percol.h │ │ │ └── warp │ │ │ │ ├── default_mma_tensor_op.h │ │ │ │ ├── mma_tensorop_compute_B_with_f16.h │ │ │ │ └── mma_tensorop_dequantizer.h │ │ │ ├── gemm_configs.h │ │ │ ├── interleaved_numeric_conversion.h │ │ │ ├── system_barrier.h │ │ │ ├── tile_interleaved_layout.h │ │ │ ├── transform │ │ │ └── threadblock │ │ │ │ └── fine_grained_scale_zero_iterator.h │ │ │ ├── util │ │ │ └── gather_tensor.hpp │ │ │ └── weight_only_quant_op.h │ │ ├── deep_gemm │ │ ├── compiler.cuh │ │ ├── fp8_gemm.cuh │ │ ├── fp8_gemm_impl.cuh │ │ ├── jit_utils.cuh │ │ ├── mma_utils.cuh │ │ ├── nvrtc_cutlass.cuh │ │ ├── nvrtc_std.cuh │ │ ├── runtime.cuh │ │ ├── scheduler.cuh │ │ ├── tma_utils.cuh │ │ └── utils.cuh │ │ ├── kernels │ │ ├── cutlass_kernels │ │ │ ├── cutlass_heuristic.cpp │ │ │ ├── cutlass_heuristic.h │ │ │ ├── cutlass_type_conversion.h │ │ │ ├── fp8_blockscale_gemm │ │ │ │ ├── ada_blockwise_gemm │ │ │ │ │ ├── sm89_fp8_gemm_1d1d.cuh │ │ │ │ │ └── sm89_utils.cuh │ │ │ │ ├── fp8_blockscale_gemm.cu │ │ │ │ ├── fp8_blockscale_gemm.h │ │ │ │ ├── fp8_blockscale_gemm_kernel.cuh │ │ │ │ ├── fp8_blockscale_mma_utils.cuh │ │ │ │ └── fp8_blockscale_tma_utils.cuh │ │ │ ├── fpA_intB_gemm │ │ │ │ ├── bf16_int4_gemm_fg_scalebias.cu │ │ │ │ ├── bf16_int4_gemm_fg_scaleonly.cu │ │ │ │ ├── bf16_int4_gemm_per_col.cu │ │ │ │ ├── bf16_int8_gemm_fg_scalebias.cu │ │ │ │ ├── bf16_int8_gemm_fg_scaleonly.cu │ │ │ │ ├── bf16_int8_gemm_per_col.cu │ │ │ │ ├── e4m3_int4_gemm_fg_scalebias_bf16_out_bf16.cu │ │ │ │ ├── e4m3_int4_gemm_fg_scalebias_f16_out_f16.cu │ │ │ │ ├── e4m3_int4_gemm_fg_scaleonly_bf16_out_bf16.cu │ │ │ │ ├── e4m3_int4_gemm_fg_scaleonly_f16_out_f16.cu │ │ │ │ ├── e4m3_int4_gemm_per_col_f16_out_f16.cu │ │ │ │ ├── fp16_int4_gemm_fg_scalebias.cu │ │ │ │ ├── fp16_int4_gemm_fg_scaleonly.cu │ │ │ │ ├── fp16_int4_gemm_per_col.cu │ │ │ │ ├── fp16_int8_gemm_fg_scalebias.cu │ │ │ │ ├── fp16_int8_gemm_fg_scaleonly.cu │ │ │ │ ├── fp16_int8_gemm_per_col.cu │ │ │ │ ├── fpA_intB_gemm.h │ │ │ │ ├── fpA_intB_gemm_template.h │ │ │ │ ├── fpA_intB_gemm_template_sm90.h │ │ │ │ └── launchers │ │ │ │ │ ├── fpA_intB_launcher_sm90.h │ │ │ │ │ └── fpA_intB_launcher_sm90.inl │ │ │ ├── include │ │ │ │ ├── common.h │ │ │ │ ├── cutlass_kernel_selector.h │ │ │ │ ├── moe_gemm_kernels.h │ │ │ │ ├── moe_kernels.h │ │ │ │ └── moe_util_kernels.h │ │ │ └── moe_gemm │ │ │ │ ├── launchers │ │ │ │ ├── fused_moe_gemm_launcher_sm80.h │ │ │ │ ├── fused_moe_gemm_launcher_sm80.inl │ │ │ │ ├── moe_gemm_tma_ws_launcher.h │ │ │ │ ├── moe_gemm_tma_ws_launcher.inl │ │ │ │ ├── moe_gemm_tma_ws_mixed_input_launcher.h │ │ │ │ └── moe_gemm_tma_ws_mixed_input_launcher.inl │ │ │ │ ├── moe_gemm_kernels_bf16_bf16.cu │ │ │ │ ├── moe_gemm_kernels_bf16_fp4.cu │ │ │ │ ├── moe_gemm_kernels_bf16_fp8.cu │ │ │ │ ├── moe_gemm_kernels_bf16_uint4.cu │ │ │ │ ├── moe_gemm_kernels_bf16_uint8.cu │ │ │ │ ├── moe_gemm_kernels_fp16_fp16.cu │ │ │ │ ├── moe_gemm_kernels_fp16_fp4.cu │ │ │ │ ├── moe_gemm_kernels_fp16_uint4.cu │ │ │ │ ├── moe_gemm_kernels_fp16_uint8.cu │ │ │ │ ├── moe_gemm_kernels_fp32_fp32.cu │ │ │ │ ├── moe_gemm_kernels_fp4_fp4.cu │ │ │ │ ├── moe_gemm_kernels_fp8_fp4.cu │ │ │ │ ├── moe_gemm_kernels_fp8_fp8.cu │ │ │ │ ├── moe_gemm_kernels_fp8_uint4.cu │ │ │ │ ├── moe_gemm_template_dispatch.h │ │ │ │ ├── moe_gemm_template_dispatch_tma_ws.h │ │ │ │ ├── moe_gemm_template_dispatch_tma_ws_mixed_dtype.h │ │ │ │ ├── moe_gemm_tma_warp_specialized_input.cu │ │ │ │ └── moe_tma_warp_specialized_traits.h │ │ ├── delayStream.cu │ │ ├── delayStream.h │ │ ├── lora │ │ │ ├── lora.cpp │ │ │ └── lora.h │ │ ├── preQuantScaleKernel.cu │ │ ├── preQuantScaleKernel.h │ │ ├── quantization.cuh │ │ └── quantization.h │ │ └── thop │ │ ├── fp4Op.cpp │ │ ├── fp4Quantize.cpp │ │ ├── fp4Quantize.h │ │ ├── fp8Quantize.cpp │ │ ├── fp8Quantize.h │ │ └── utils.h ├── nvshmem_binding.cu ├── page.cu ├── pod.cu ├── pod_customize_config.jinja ├── pod_jit_binding.cu ├── pod_kernel_inst.jinja ├── quantization.cu ├── renorm.cu ├── rope.cu ├── runtime_utils.h ├── sampling.cu ├── sampling_utils.h ├── single_decode.cu ├── single_decode_customize_config.jinja ├── single_decode_jit_binding.cu ├── single_decode_kernel_inst.jinja ├── single_prefill.cu ├── single_prefill_customize_config.jinja ├── single_prefill_fp8_sm90.cu ├── single_prefill_fp8_sm90_kernel_inst.jinja ├── single_prefill_jit_binding.cu ├── single_prefill_kernel_inst.jinja ├── single_prefill_sm90.cu ├── single_prefill_sm90_customize_config.jinja ├── single_prefill_sm90_jit_binding.cu ├── single_prefill_sm90_kernel_inst.jinja ├── tgv_gemm.cu ├── tgv_gemm.jinja ├── trtllm_allreduce.cu ├── trtllm_allreduce_fusion.cu ├── trtllm_alltoall.cu ├── trtllm_alltoall_prepare.cu ├── trtllm_batched_gemm_runner.cu ├── trtllm_fmha_kernel_launcher.cu ├── trtllm_fmha_v2_binding.cu ├── trtllm_fused_moe_dev_kernel.cu ├── trtllm_fused_moe_kernel_launcher.cu ├── trtllm_fused_moe_routing_deepseek.cu ├── trtllm_fused_moe_routing_llama4.cu ├── trtllm_fused_moe_routing_renormalize.cu ├── trtllm_fused_moe_runner.cu ├── trtllm_gemm_runner.cu ├── trtllm_low_latency_gemm_runner.cu ├── trtllm_mnnvl_allreduce.cu ├── trtllm_moe_allreduce_fusion.cu ├── tvm_ffi_utils.h ├── vllm_custom_all_reduce.cu └── xqa │ ├── barriers.cuh │ ├── cuda_hint.cuh │ ├── defines.h │ ├── gmma.cuh │ ├── gmma_impl.cuh │ ├── hostUtils.h │ ├── ldgsts.cuh │ ├── mha.cu │ ├── mha.h │ ├── mhaUtils.cuh │ ├── mha_components.cuh │ ├── mha_sm90.cu │ ├── mha_stdheaders.cuh │ ├── mla_sm120.cu │ ├── mla_sm120.cuh │ ├── mma.cuh │ ├── platform.h │ ├── specDec.h │ ├── tensorMap.cpp │ ├── tensorMap.h │ ├── tma.h │ ├── utils.cuh │ ├── utils.h │ └── xqa_wrapper.cu ├── docker ├── Dockerfile.cu126 ├── Dockerfile.cu126.dev ├── Dockerfile.cu128 ├── Dockerfile.cu128.dev ├── Dockerfile.cu129 ├── Dockerfile.cu129.dev ├── Dockerfile.cu130 ├── Dockerfile.cu130.dev ├── bash.sh └── install │ ├── install_python.sh │ └── install_python_packages.sh ├── docs ├── .gitignore ├── Makefile ├── _static │ ├── FlashInfer-black-background.png │ └── FlashInfer-white-background.png ├── api │ ├── activation.rst │ ├── attention.rst │ ├── cascade.rst │ ├── comm.rst │ ├── fp4_quantization.rst │ ├── fused_moe.rst │ ├── gemm.rst │ ├── green_ctx.rst │ ├── logits_processor.rst │ ├── norm.rst │ ├── page.rst │ ├── quantization.rst │ ├── rope.rst │ ├── sampling.rst │ ├── sparse.rst │ └── testing.rst ├── build_docs.sh ├── conf.py ├── index.rst ├── installation.rst ├── logging.rst ├── make.bat ├── requirements.txt ├── tutorials │ ├── kv_layout.rst │ └── recursive_attention.rst └── wrap_run_llm.py ├── flashinfer-cubin ├── .gitignore ├── build_backend.py ├── flashinfer_cubin │ └── __init__.py └── pyproject.toml ├── flashinfer-jit-cache ├── .gitignore ├── build_backend.py ├── flashinfer_jit_cache │ └── __init__.py └── pyproject.toml ├── flashinfer ├── __init__.py ├── __main__.py ├── activation.py ├── aot.py ├── api_logging.py ├── artifacts.py ├── attention.py ├── autotuner.py ├── cascade.py ├── comm │ ├── __init__.py │ ├── cuda_ipc.py │ ├── dlpack_utils.py │ ├── mapping.py │ ├── mnnvl.py │ ├── nvshmem.py │ ├── nvshmem_allreduce.py │ ├── trtllm_alltoall.py │ ├── trtllm_ar.py │ ├── trtllm_mnnvl_ar.py │ └── vllm_ar.py ├── compilation_context.py ├── cuda_utils.py ├── cudnn │ ├── __init__.py │ ├── decode.py │ ├── prefill.py │ └── utils.py ├── cute_dsl │ ├── blockscaled_gemm.py │ ├── gemm_allreduce_two_shot.py │ └── utils.py ├── decode.py ├── deep_gemm.py ├── dsv3_ops │ └── __init__.py ├── fp4_quantization.py ├── fp8_quantization.py ├── fused_moe │ ├── __init__.py │ ├── core.py │ ├── fused_routing_dsv3.py │ └── utils.py ├── gemm │ ├── __init__.py │ ├── gemm_base.py │ └── routergemm_dsv3.py ├── green_ctx.py ├── jit │ ├── __init__.py │ ├── activation.py │ ├── attention │ │ ├── __init__.py │ │ ├── fmha_v2 │ │ │ ├── generate_kernels.py │ │ │ └── generator_utils.py │ │ ├── modules.py │ │ ├── utils.py │ │ └── variants.py │ ├── cascade.py │ ├── comm.py │ ├── core.py │ ├── cpp_ext.py │ ├── cubin_loader.py │ ├── dsv3_optimizations.py │ ├── env.py │ ├── fp4_quantization.py │ ├── fp8_quantization.py │ ├── fused_moe.py │ ├── gemm │ │ ├── __init__.py │ │ ├── core.py │ │ ├── cutlass │ │ │ ├── __init__.py │ │ │ ├── cutlass_library.py │ │ │ └── generate_kernels.py │ │ └── deepgemm.py │ ├── mla.py │ ├── norm.py │ ├── page.py │ ├── quantization.py │ ├── rope.py │ ├── sampling.py │ ├── spdlog.py │ ├── tllm_utils.py │ ├── utils.py │ └── xqa.py ├── logits_processor │ ├── __init__.py │ ├── compiler.py │ ├── fusion_rules.py │ ├── legalization.py │ ├── op.py │ ├── operators.py │ ├── pipeline.py │ ├── processors.py │ ├── types.py │ └── validators.py ├── mla.py ├── norm.py ├── page.py ├── pod.py ├── prefill.py ├── profiler │ └── __init__.py ├── py.typed ├── quantization.py ├── rope.py ├── sampling.py ├── sparse.py ├── testing │ ├── __init__.py │ └── utils.py ├── tllm_utils.py ├── triton │ ├── __init__.py │ ├── activation.py │ ├── cascade.py │ ├── gemm.py │ ├── kernels │ │ ├── __init__.py │ │ ├── activation.py │ │ ├── cascade.py │ │ ├── norm.py │ │ ├── quant.py │ │ └── sm_constraint_gemm.py │ ├── norm.py │ ├── page.py │ ├── sm_constraint_gemm.py │ └── utils.py ├── trtllm_low_latency_gemm.py ├── tuning_configs │ ├── v0_1_trtllm_fused_moe_NVIDIA_B200.py │ └── v0_1_trtllm_fused_moe_NVIDIA_GB200.py ├── utils.py ├── version.py └── xqa.py ├── include └── flashinfer │ ├── activation.cuh │ ├── allocator.h │ ├── arch_condition.h │ ├── attention │ ├── batch_pod.cuh │ ├── blackwell │ │ ├── collective │ │ │ ├── fmha_common.hpp │ │ │ ├── fmha_fusion.hpp │ │ │ ├── sm100_fmha_fwd_epilogue_tma_warpspecialized.hpp │ │ │ ├── sm100_fmha_fwd_mainloop_tma_warpspecialized.hpp │ │ │ ├── sm100_fmha_gen_epilogue_warpspecialized.hpp │ │ │ ├── sm100_fmha_gen_mainloop_warpspecialized.hpp │ │ │ ├── sm100_fmha_load_cpasync_warpspecialized.hpp │ │ │ └── sm100_fmha_load_tma_warpspecialized.hpp │ │ ├── common │ │ │ └── pow_2.hpp │ │ ├── device │ │ │ ├── fmha.hpp │ │ │ └── sm100_mla.hpp │ │ ├── fmha_cutlass_sm100.cuh │ │ ├── kernel │ │ │ ├── fmha_options.hpp │ │ │ ├── fmha_tile_scheduler.hpp │ │ │ ├── gather_tensor.hpp │ │ │ ├── sm100_fmha_fwd_kernel_tma_warpspecialized.hpp │ │ │ ├── sm100_fmha_gen_kernel_warpspecialized.hpp │ │ │ ├── sm100_fmha_mla_reduction.hpp │ │ │ ├── sm100_fmha_mla_tma_warpspecialized.hpp │ │ │ └── sm100_mla_tile_scheduler.hpp │ │ └── plan.cuh │ ├── cascade.cuh │ ├── cutlass_mla.cuh │ ├── decode.cuh │ ├── decode_mla_cute_sm80.cuh │ ├── default_decode_params.cuh │ ├── default_prefill_params.cuh │ ├── heap.h │ ├── hopper.cuh │ ├── hopper │ │ ├── attention_updater.cuh │ │ ├── block_sparse_gather.cuh │ │ ├── default_params.cuh │ │ ├── epilogue.cuh │ │ ├── kernel_traits.cuh │ │ ├── mainloop.cuh │ │ ├── mainloop_mma.cuh │ │ ├── named_barrier.cuh │ │ ├── prefill_sm90.cuh │ │ ├── quantization │ │ │ ├── epilogue.cuh │ │ │ ├── kernel_traits.cuh │ │ │ ├── mainloop_load.cuh │ │ │ ├── mainloop_mma.cuh │ │ │ ├── mainloop_sparse_load.cuh │ │ │ └── prefill_sm90.cuh │ │ ├── sparse_mainloop.cuh │ │ ├── tile_scheduler.cuh │ │ ├── utils.cuh │ │ ├── variant_helper.cuh │ │ └── variants.cuh │ ├── mask.cuh │ ├── mla.cuh │ ├── mla_hopper.cuh │ ├── mla_params.cuh │ ├── persistent.cuh │ ├── persistent_template.cuh │ ├── pod.cuh │ ├── prefill.cuh │ ├── scheduler.cuh │ ├── state.cuh │ ├── variant_helper.cuh │ └── variants.cuh │ ├── attention_impl.cuh │ ├── comm │ ├── trtllm_allreduce.cuh │ ├── trtllm_allreduce_fusion.cuh │ ├── trtllm_alltoall.cuh │ ├── trtllm_alltoall_prepare.cuh │ ├── trtllm_mnnvl_allreduce.cuh │ ├── trtllm_moe_allreduce_fusion.cuh │ └── vllm_custom_all_reduce.cuh │ ├── cp_async.cuh │ ├── cubin_loader.h │ ├── cutlass_utils.cuh │ ├── exception.h │ ├── fastdiv.cuh │ ├── fp16.h │ ├── fp4_layout.cuh │ ├── frag_layout_swizzle.cuh │ ├── gemm │ ├── bmm_fp8.cuh │ ├── cutlass_gemm_configs.h │ ├── dsv3_router_gemm.cuh │ ├── fp4_gemm_cutlass.h │ ├── fp4_gemm_cutlass_template.h │ ├── fp4_gemm_cutlass_template_sm120.h │ ├── fp4_gemm_template_sm100.h │ ├── fp4_gemm_template_sm120.h │ ├── fp8_gemm_cutlass.h │ ├── fp8_gemm_cutlass_template.h │ ├── fp8_gemm_template_sm100.h │ ├── gemm_groupwise_sm100.cuh │ ├── gemm_groupwise_sm120.cuh │ ├── group_gemm.cuh │ ├── group_gemm_fp8_groupwise_sm100.cuh │ ├── group_gemm_fp8_groupwise_sm120.cuh │ ├── group_gemm_lora.cuh │ ├── group_gemm_mxfp4_groupwise_sm100.cuh │ ├── group_gemm_sm90.cuh │ ├── group_gemv.cuh │ ├── tgv_gemm.cuh │ ├── tgv_gemm_configs.h │ └── tgv_gemm_template.h │ ├── layout.cuh │ ├── logging.h │ ├── math.cuh │ ├── mma.cuh │ ├── norm.cuh │ ├── page.cuh │ ├── permuted_smem.cuh │ ├── pos_enc.cuh │ ├── profiler.cuh │ ├── quantization.cuh │ ├── sampling.cuh │ ├── trtllm │ ├── batched_gemm │ │ ├── KernelRunner.h │ │ └── trtllmGen_bmm_export │ │ │ ├── BatchedGemmEnums.h │ │ │ ├── BatchedGemmInterface.h │ │ │ ├── BatchedGemmOptions.h │ │ │ ├── Enums.h │ │ │ ├── GemmGatedActOptions.h │ │ │ ├── GemmOptions.h │ │ │ ├── KernelParams.h │ │ │ ├── KernelParamsDecl.h │ │ │ ├── KernelTraits.h │ │ │ ├── TmaDescriptor.h │ │ │ └── trtllm │ │ │ └── gen │ │ │ ├── CommonUtils.h │ │ │ ├── CudaKernelLauncher.h │ │ │ ├── DtypeDecl.h │ │ │ ├── MmaDecl.h │ │ │ └── SfLayoutDecl.h │ ├── common.h │ ├── common │ │ ├── cudaBf16Fallbacks.cuh │ │ ├── cudaBf16Wrapper.h │ │ ├── cudaFp8Utils.h │ │ ├── cudaTypeUtils.cuh │ │ ├── cudaUtils.h │ │ └── reduceKernelUtils.cuh │ ├── fmha │ │ ├── decoder_impl_common.h │ │ ├── decoder_params.h │ │ ├── fmhaKernels.cuh │ │ ├── fmhaReduction.h │ │ ├── fmhaRunner.cuh │ │ ├── fmhaRunnerParams.h │ │ ├── kernelParams.h │ │ ├── kernelUtils.h │ │ └── lse.cuh │ ├── fused_moe │ │ ├── DevKernel.h │ │ ├── IntFastDiv.h │ │ ├── RoutingKernel.cuh │ │ ├── RoutingKernel.h │ │ ├── RoutingKernelTopK.cuh │ │ ├── noAuxTcKernels.h │ │ └── runner.h │ └── gemm │ │ └── trtllmGen_gemm_export │ │ ├── Enums.h │ │ ├── GemmInterface.h │ │ ├── GemmOptions.h │ │ ├── KernelParams.h │ │ ├── KernelParamsDecl.h │ │ ├── KernelTraits.h │ │ ├── TmaDescriptor.h │ │ └── trtllm │ │ └── gen │ │ ├── CommonUtils.h │ │ ├── CudaKernelLauncher.h │ │ ├── DtypeDecl.h │ │ ├── MmaDecl.h │ │ └── SfLayoutDecl.h │ ├── utils.cuh │ └── vec_dtypes.cuh ├── licenses ├── LICENSE.cutlass.txt ├── LICENSE.flashattention3.txt ├── LICENSE.fmt.txt └── LICENSE.spdlog.txt ├── profiler ├── .gitignore ├── README.md ├── batch_attention.py └── mla.py ├── pyproject.toml ├── pytest.ini ├── requirements.txt ├── scripts ├── authorized_codeowner.txt ├── build_flashinfer_jit_cache_whl.sh ├── ci-flashinfer.env.example ├── ci-flashinfer.service ├── codeowner_analyzer.py ├── print_jit_cache_summary.py ├── task_cpplint.sh ├── task_jit_run_tests_part1.sh ├── task_jit_run_tests_part2.sh ├── task_jit_run_tests_part3.sh ├── task_jit_run_tests_part4.sh ├── task_jit_run_tests_part5.sh ├── task_lint.sh ├── task_mypy.sh ├── task_pylint.sh ├── task_show_node_info.sh ├── task_test_blackwell_kernels.sh ├── task_test_jit_cache_package_build_import.sh ├── task_test_multi_node_comm_kernels.sh ├── task_test_nightly_build.sh ├── task_test_single_node_comm_kernels.sh ├── update_whl_index.py └── verify_all_modules_compiled.py ├── tests ├── __init__.py ├── attention │ ├── __init__.py │ ├── test_alibi.py │ ├── test_attention_sink.py │ ├── test_attention_sink_blackwell.py │ ├── test_batch_attention.py │ ├── test_batch_decode_kernels.py │ ├── test_batch_invariant_fa2.py │ ├── test_batch_prefill.py │ ├── test_batch_prefill_kernels.py │ ├── test_blackwell_fmha.py │ ├── test_cudnn_decode.py │ ├── test_cudnn_prefill.py │ ├── test_cudnn_prefill_deepseek.py │ ├── test_decode_fp8_calibration_scale.py │ ├── test_decode_prefill_lse.py │ ├── test_deepseek_mla.py │ ├── test_fmha_v2_prefill_deepseek.py │ ├── test_fp8_prefill.py │ ├── test_hopper.py │ ├── test_hopper_fp8_attention.py │ ├── test_logits_cap.py │ ├── test_mla_decode_kernel.py │ ├── test_mla_page.py │ ├── test_non_contiguous_decode.py │ ├── test_non_contiguous_prefill.py │ ├── test_page.py │ ├── test_rope.py │ ├── test_shared_prefix_kernels.py │ ├── test_single_prefill.py │ ├── test_sliding_window.py │ ├── test_tensor_cores_decode.py │ ├── test_trtllm_gen_attention.py │ ├── test_trtllm_gen_mla.py │ ├── test_trtllm_ragged_kv_stride.py │ ├── test_xqa.py │ ├── test_xqa_batch_decode.py │ └── test_xqa_mla_batch_decode.py ├── cli │ └── test_cli_show_config.py ├── comm │ ├── __init__.py │ ├── test_mnnvl_custom_comm.py │ ├── test_mnnvl_memory.py │ ├── test_nvshmem.py │ ├── test_nvshmem_allreduce.py │ ├── test_trtllm_allreduce.py │ ├── test_trtllm_allreduce_fusion.py │ ├── test_trtllm_alltoall.py │ ├── test_trtllm_mnnvl_allreduce.py │ ├── test_trtllm_mnnvl_allreduce_custom_comm.py │ ├── test_trtllm_moe_allreduce_fusion.py │ ├── test_trtllm_moe_allreduce_fusion_finalize.py │ └── test_vllm_custom_allreduce.py ├── conftest.py ├── gemm │ ├── __init__.py │ ├── test_bmm_fp8.py │ ├── test_cute_dsl_blockscaled_gemm.py │ ├── test_cute_dsl_gemm_allreduce_two_shot.py │ ├── test_group_gemm.py │ ├── test_groupwise_scaled_gemm_fp8.py │ ├── test_groupwise_scaled_gemm_mxfp4.py │ ├── test_mm_fp4.py │ ├── test_mm_fp8.py │ ├── test_sm_constraint_gemm.py │ └── test_tgv_gemm.py ├── model_optimizations │ ├── test_dsv3_fused_routing.py │ └── test_dsv3_router_gemm.py ├── moe │ ├── __init__.py │ ├── test_dpsk_fused_moe_fp8.py │ ├── test_trtllm_cutlass_fused_moe.py │ ├── test_trtllm_gen_fused_moe.py │ ├── test_trtllm_gen_routed_fused_moe.py │ └── utils.py ├── test_artifacts.py ├── test_helpers │ ├── __init__.py │ ├── alibi_reference.py │ ├── jit_utils.py │ ├── params.py │ ├── rope_reference.py │ ├── sink_attention_reference.py │ ├── test_helpers.py │ └── utils_fp4.py ├── utils │ ├── __init__.py │ ├── test_activation.py │ ├── test_block_sparse.py │ ├── test_block_sparse_indices_to_vector_sparse_offsets.py │ ├── test_create_ipc_buffer.py │ ├── test_decorators.py │ ├── test_fp4_quantize.py │ ├── test_fp4_tensor_torch_cute.py │ ├── test_fp8_quantize.py │ ├── test_green_ctx.py │ ├── test_jit_example.py │ ├── test_jit_warmup.py │ ├── test_load_cubin_compile_race_condition.py │ ├── test_logging.py │ ├── test_logits_processor.py │ ├── test_norm.py │ ├── test_pod_kernels.py │ ├── test_quantization.py │ ├── test_sampling.py │ └── test_triton_cascade.py └── utils_fp8.py └── version.txt /.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.clang-format -------------------------------------------------------------------------------- /.devcontainer/cu126/devcontainer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.devcontainer/cu126/devcontainer.json -------------------------------------------------------------------------------- /.devcontainer/cu128/devcontainer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.devcontainer/cu128/devcontainer.json -------------------------------------------------------------------------------- /.devcontainer/cu129/devcontainer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.devcontainer/cu129/devcontainer.json -------------------------------------------------------------------------------- /.devcontainer/cu130/devcontainer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.devcontainer/cu130/devcontainer.json -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.github/pull_request_template.md -------------------------------------------------------------------------------- /.github/workflows/build-doc.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.github/workflows/build-doc.yml -------------------------------------------------------------------------------- /.github/workflows/new-issue.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.github/workflows/new-issue.yml -------------------------------------------------------------------------------- /.github/workflows/nightly-release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.github/workflows/nightly-release.yml -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /.github/workflows/release-ci-docker.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.github/workflows/release-ci-docker.yml -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.github/workflows/release.yml -------------------------------------------------------------------------------- /.github/workflows/update-codeowners.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.github/workflows/update-codeowners.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.gitmodules -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/Jenkinsfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/LICENSE -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/NOTICE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/README.md -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/README.md -------------------------------------------------------------------------------- /benchmarks/bench_append_paged_kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_append_paged_kv_cache.py -------------------------------------------------------------------------------- /benchmarks/bench_append_paged_mla_kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_append_paged_mla_kv_cache.py -------------------------------------------------------------------------------- /benchmarks/bench_attention_sink_triton_sgl_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_attention_sink_triton_sgl_context.py -------------------------------------------------------------------------------- /benchmarks/bench_attention_sink_triton_sgl_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_attention_sink_triton_sgl_decode.py -------------------------------------------------------------------------------- /benchmarks/bench_batch_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_batch_attention.py -------------------------------------------------------------------------------- /benchmarks/bench_batch_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_batch_decode.py -------------------------------------------------------------------------------- /benchmarks/bench_blackwell_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_blackwell_attention.py -------------------------------------------------------------------------------- /benchmarks/bench_block_sparse_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_block_sparse_attention.py -------------------------------------------------------------------------------- /benchmarks/bench_cute_dsl_blockscaled_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_cute_dsl_blockscaled_gemm.py -------------------------------------------------------------------------------- /benchmarks/bench_cutlass_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_cutlass_fused_moe.py -------------------------------------------------------------------------------- /benchmarks/bench_deepgemm_blackwell.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_deepgemm_blackwell.py -------------------------------------------------------------------------------- /benchmarks/bench_deepseek_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_deepseek_mla.py -------------------------------------------------------------------------------- /benchmarks/bench_fused_add_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_fused_add_rmsnorm.py -------------------------------------------------------------------------------- /benchmarks/bench_groupwise_gemm_fp8_blackwell.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_groupwise_gemm_fp8_blackwell.py -------------------------------------------------------------------------------- /benchmarks/bench_groupwise_grouped_gemm_fp8_blackwell.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_groupwise_grouped_gemm_fp8_blackwell.py -------------------------------------------------------------------------------- /benchmarks/bench_groupwise_grouped_gemm_mxfp4_blackwell.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_groupwise_grouped_gemm_mxfp4_blackwell.py -------------------------------------------------------------------------------- /benchmarks/bench_hopper_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_hopper_attention.py -------------------------------------------------------------------------------- /benchmarks/bench_hopper_fp8_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_hopper_fp8_attention.py -------------------------------------------------------------------------------- /benchmarks/bench_hopper_grouped_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_hopper_grouped_gemm.py -------------------------------------------------------------------------------- /benchmarks/bench_logging_overhead.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_logging_overhead.py -------------------------------------------------------------------------------- /benchmarks/bench_mixed_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_mixed_attention.py -------------------------------------------------------------------------------- /benchmarks/bench_mm_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_mm_fp8.py -------------------------------------------------------------------------------- /benchmarks/bench_persistent_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_persistent_gemm.py -------------------------------------------------------------------------------- /benchmarks/bench_renorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_renorm.py -------------------------------------------------------------------------------- /benchmarks/bench_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_rope.py -------------------------------------------------------------------------------- /benchmarks/bench_rope_quantize_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_rope_quantize_fp8.py -------------------------------------------------------------------------------- /benchmarks/bench_rope_quantize_fp8_append_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_rope_quantize_fp8_append_cache.py -------------------------------------------------------------------------------- /benchmarks/bench_sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_sampling.py -------------------------------------------------------------------------------- /benchmarks/bench_sliding_window.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_sliding_window.py -------------------------------------------------------------------------------- /benchmarks/bench_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_softmax.py -------------------------------------------------------------------------------- /benchmarks/bench_tgv_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_tgv_gemm.py -------------------------------------------------------------------------------- /benchmarks/bench_trtllm_fmha.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_trtllm_fmha.py -------------------------------------------------------------------------------- /benchmarks/bench_trtllm_gen_fused_moe_autotuner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_trtllm_gen_fused_moe_autotuner.py -------------------------------------------------------------------------------- /benchmarks/bench_trtllm_gen_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/bench_trtllm_gen_mla.py -------------------------------------------------------------------------------- /benchmarks/flashinfer_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/flashinfer_benchmark.py -------------------------------------------------------------------------------- /benchmarks/routines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/routines/__init__.py -------------------------------------------------------------------------------- /benchmarks/routines/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/routines/attention.py -------------------------------------------------------------------------------- /benchmarks/routines/flashinfer_benchmark_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/routines/flashinfer_benchmark_utils.py -------------------------------------------------------------------------------- /benchmarks/routines/gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/routines/gemm.py -------------------------------------------------------------------------------- /benchmarks/routines/moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/routines/moe.py -------------------------------------------------------------------------------- /benchmarks/samples/sample_testlist.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/samples/sample_testlist.txt -------------------------------------------------------------------------------- /benchmarks/samples/sample_testlist_output.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/samples/sample_testlist_output.csv -------------------------------------------------------------------------------- /benchmarks/samples/sample_testlist_output.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/samples/sample_testlist_output.txt -------------------------------------------------------------------------------- /benchmarks/test_flashinfer_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/benchmarks/test_flashinfer_benchmark.py -------------------------------------------------------------------------------- /ci/bash.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/ci/bash.sh -------------------------------------------------------------------------------- /ci/docker-tags.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/ci/docker-tags.yml -------------------------------------------------------------------------------- /ci/scripts/jenkins/git_skip_ci.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/ci/scripts/jenkins/git_skip_ci.py -------------------------------------------------------------------------------- /ci/scripts/jenkins/git_skip_ci_globs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/ci/scripts/jenkins/git_skip_ci_globs.py -------------------------------------------------------------------------------- /ci/scripts/jenkins/git_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/ci/scripts/jenkins/git_utils.py -------------------------------------------------------------------------------- /ci/scripts/jenkins/retry.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/ci/scripts/jenkins/retry.sh -------------------------------------------------------------------------------- /csrc/batch_attention.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_attention.cu -------------------------------------------------------------------------------- /csrc/batch_attention_customize_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_attention_customize_config.jinja -------------------------------------------------------------------------------- /csrc/batch_attention_jit_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_attention_jit_binding.cu -------------------------------------------------------------------------------- /csrc/batch_attention_paged_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_attention_paged_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/batch_decode.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_decode.cu -------------------------------------------------------------------------------- /csrc/batch_decode_customize_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_decode_customize_config.jinja -------------------------------------------------------------------------------- /csrc/batch_decode_jit_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_decode_jit_binding.cu -------------------------------------------------------------------------------- /csrc/batch_decode_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_decode_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/batch_decode_mla_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_decode_mla_binding.cu -------------------------------------------------------------------------------- /csrc/batch_decode_mla_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_decode_mla_config.jinja -------------------------------------------------------------------------------- /csrc/batch_decode_mla_cute_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_decode_mla_cute_sm80.cu -------------------------------------------------------------------------------- /csrc/batch_decode_mla_plan.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_decode_mla_plan.cu -------------------------------------------------------------------------------- /csrc/batch_decode_mla_run.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_decode_mla_run.cu -------------------------------------------------------------------------------- /csrc/batch_mla_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_mla_binding.cu -------------------------------------------------------------------------------- /csrc/batch_mla_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_mla_config.jinja -------------------------------------------------------------------------------- /csrc/batch_mla_plan.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_mla_plan.cu -------------------------------------------------------------------------------- /csrc/batch_mla_run.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_mla_run.cu -------------------------------------------------------------------------------- /csrc/batch_mla_sm90_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_mla_sm90_binding.cu -------------------------------------------------------------------------------- /csrc/batch_mla_sm90_plan.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_mla_sm90_plan.cu -------------------------------------------------------------------------------- /csrc/batch_mla_sm90_run.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_mla_sm90_run.cu -------------------------------------------------------------------------------- /csrc/batch_pod.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_pod.cu -------------------------------------------------------------------------------- /csrc/batch_pod_customize_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_pod_customize_config.jinja -------------------------------------------------------------------------------- /csrc/batch_pod_jit_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_pod_jit_binding.cu -------------------------------------------------------------------------------- /csrc/batch_pod_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_pod_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/batch_prefill.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill.cu -------------------------------------------------------------------------------- /csrc/batch_prefill_customize_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_customize_config.jinja -------------------------------------------------------------------------------- /csrc/batch_prefill_fp8_paged_sm90_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_fp8_paged_sm90_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/batch_prefill_fp8_ragged_sm90_kernel_inst.jinja: -------------------------------------------------------------------------------- 1 | // TODO: Not implemented yet 2 | -------------------------------------------------------------------------------- /csrc/batch_prefill_fp8_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_fp8_sm90.cu -------------------------------------------------------------------------------- /csrc/batch_prefill_jit_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_jit_binding.cu -------------------------------------------------------------------------------- /csrc/batch_prefill_paged_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_paged_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/batch_prefill_paged_sm90_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_paged_sm90_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/batch_prefill_ragged_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_ragged_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/batch_prefill_ragged_sm90_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_ragged_sm90_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/batch_prefill_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_sm90.cu -------------------------------------------------------------------------------- /csrc/batch_prefill_sm90_customize_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_sm90_customize_config.jinja -------------------------------------------------------------------------------- /csrc/batch_prefill_sm90_jit_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/batch_prefill_sm90_jit_binding.cu -------------------------------------------------------------------------------- /csrc/blackwell_fmha_plan.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/blackwell_fmha_plan.cu -------------------------------------------------------------------------------- /csrc/bmm_fp8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/bmm_fp8.cu -------------------------------------------------------------------------------- /csrc/cascade.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/cascade.cu -------------------------------------------------------------------------------- /csrc/cudnn_sdpa_kernel_launcher.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/cudnn_sdpa_kernel_launcher.cu -------------------------------------------------------------------------------- /csrc/cudnn_sdpa_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/cudnn_sdpa_utils.h -------------------------------------------------------------------------------- /csrc/cutlass_mla.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/cutlass_mla.cu -------------------------------------------------------------------------------- /csrc/dsv3_router_gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/dsv3_router_gemm.cu -------------------------------------------------------------------------------- /csrc/flashinfer_cascade_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/flashinfer_cascade_binding.cu -------------------------------------------------------------------------------- /csrc/flashinfer_gemm_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/flashinfer_gemm_binding.cu -------------------------------------------------------------------------------- /csrc/flashinfer_gemm_sm90_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/flashinfer_gemm_sm90_binding.cu -------------------------------------------------------------------------------- /csrc/flashinfer_mla_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/flashinfer_mla_binding.cu -------------------------------------------------------------------------------- /csrc/flashinfer_norm_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/flashinfer_norm_binding.cu -------------------------------------------------------------------------------- /csrc/flashinfer_page_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/flashinfer_page_binding.cu -------------------------------------------------------------------------------- /csrc/flashinfer_quantization_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/flashinfer_quantization_binding.cu -------------------------------------------------------------------------------- /csrc/flashinfer_rope_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/flashinfer_rope_binding.cu -------------------------------------------------------------------------------- /csrc/flashinfer_sampling_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/flashinfer_sampling_binding.cu -------------------------------------------------------------------------------- /csrc/flashinfer_xqa_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/flashinfer_xqa_binding.cu -------------------------------------------------------------------------------- /csrc/fmhaReduction.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmhaReduction.cu -------------------------------------------------------------------------------- /csrc/fmha_cutlass_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_cutlass_sm100.cu -------------------------------------------------------------------------------- /csrc/fmha_cutlass_sm100_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_cutlass_sm100_binding.cu -------------------------------------------------------------------------------- /csrc/fmha_v2/convert.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/convert.cu -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/alibi_params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/alibi_params.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/fragment.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/fragment.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/gemm.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/gmem_tile_o.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/gmem_tile_o.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/gmem_tile_o_packed.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/gmem_tile_o_packed.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/gmem_tile_ps.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/gmem_tile_ps.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/gmem_tile_qkv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/gmem_tile_qkv.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/gmem_tile_qkv_packed.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/gmem_tile_qkv_packed.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/arrive_wait.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/arrive_wait.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/compute_tile.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/compute_tile.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/fragment.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/fragment.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/gmem_tile_o_packed.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/gmem_tile_o_packed.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/gmem_tile_qkv_packed.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/gmem_tile_qkv_packed.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/gmma_descriptor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/gmma_descriptor.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/kernel_traits.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/smem_tile.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/smem_tile.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/smem_tile_o.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/smem_tile_o.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/tma_descriptor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/tma_descriptor.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/tma_types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/tma_types.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/utils_gmma.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/utils_gmma.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/utils_hgmma.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/utils_hgmma.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/utils_hgmma_bf16.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/utils_hgmma_bf16.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/utils_igmma.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/utils_igmma.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/utils_qgmma.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/utils_qgmma.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/utils_tma.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/utils_tma.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/hopper/utils_warpgroup.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/hopper/utils_warpgroup.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/kernel_traits.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/mask.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/mask.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/numeric_types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/numeric_types.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/paged_kv_cache.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/paged_kv_cache.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/smem_tile.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/smem_tile.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/smem_tile_o.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/smem_tile_o.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/smem_tile_qkv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/smem_tile_qkv.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/smem_tile_v.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/smem_tile_v.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/softmax.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/traits.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/utils.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/warpspec/circular_buffer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/warpspec/circular_buffer.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/warpspec/compute.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/warpspec/compute.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/warpspec/dma.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/warpspec/dma.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/warpspec/epilogue.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/warpspec/epilogue.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fmha/warpspec/kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fmha/warpspec/kernel_traits.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention.cpp -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_demo_bert_params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_demo_bert_params.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_kernel.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_kernel_1xN.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_kernel_1xN.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_kernel_1xN_multi_cta.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_kernel_1xN_multi_cta.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_kernel_1xN_noloop.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_kernel_1xN_noloop.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_kernel_2x2.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_kernel_2x2.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_kernel_4x1_hopper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_kernel_4x1_hopper.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_kernel_4x1_hopper_noloop.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_kernel_4x1_hopper_noloop.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_kernel_4xN_hopper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_kernel_4xN_hopper.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_kernel_4xN_hopper_noloop.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_kernel_4xN_hopper_noloop.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_attention_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_attention_utils.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_cross_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_cross_attention.cpp -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_cross_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_cross_attention.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_cross_attention_kernel_1xN.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_cross_attention_kernel_1xN.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_cross_attention_kernel_1xN_noloop.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_cross_attention_kernel_1xN_noloop.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_flash_attention_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_flash_attention_kernel.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_flash_attention_kernel_noloop.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_flash_attention_kernel_noloop.h -------------------------------------------------------------------------------- /csrc/fmha_v2/fused_multihead_flash_attention_kernel_noloop_tiled.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/fused_multihead_flash_attention_kernel_noloop_tiled.h -------------------------------------------------------------------------------- /csrc/fmha_v2/softmax_bf16.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/softmax_bf16.cu -------------------------------------------------------------------------------- /csrc/fmha_v2/softmax_fp16.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/softmax_fp16.cu -------------------------------------------------------------------------------- /csrc/fmha_v2/softmax_fp32.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/softmax_fp32.cu -------------------------------------------------------------------------------- /csrc/fmha_v2/softmax_fp8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/softmax_fp8.cu -------------------------------------------------------------------------------- /csrc/fmha_v2/softmax_impl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/softmax_impl.h -------------------------------------------------------------------------------- /csrc/fmha_v2/softmax_int8.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fmha_v2/softmax_int8.cu -------------------------------------------------------------------------------- /csrc/fp4_gemm_cutlass.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fp4_gemm_cutlass.cu -------------------------------------------------------------------------------- /csrc/fp4_gemm_cutlass.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fp4_gemm_cutlass.jinja -------------------------------------------------------------------------------- /csrc/fp4_gemm_cutlass_sm120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fp4_gemm_cutlass_sm120.cu -------------------------------------------------------------------------------- /csrc/fp4_gemm_cutlass_sm120.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fp4_gemm_cutlass_sm120.jinja -------------------------------------------------------------------------------- /csrc/fp8_gemm_cutlass.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fp8_gemm_cutlass.cu -------------------------------------------------------------------------------- /csrc/fp8_gemm_cutlass.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fp8_gemm_cutlass.jinja -------------------------------------------------------------------------------- /csrc/fused_moe/cutlass_backend/cutlass_fused_moe_instantiation.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fused_moe/cutlass_backend/cutlass_fused_moe_instantiation.cu -------------------------------------------------------------------------------- /csrc/fused_moe/cutlass_backend/cutlass_fused_moe_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fused_moe/cutlass_backend/cutlass_fused_moe_kernels.cuh -------------------------------------------------------------------------------- /csrc/fused_moe/cutlass_backend/deepgemm_jit_setup.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fused_moe/cutlass_backend/deepgemm_jit_setup.cu -------------------------------------------------------------------------------- /csrc/fused_moe/cutlass_backend/flashinfer_cutlass_fused_moe_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fused_moe/cutlass_backend/flashinfer_cutlass_fused_moe_binding.cu -------------------------------------------------------------------------------- /csrc/fused_moe/moeTopKFuncs.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fused_moe/moeTopKFuncs.cuh -------------------------------------------------------------------------------- /csrc/fused_moe/noAuxTcKernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/fused_moe/noAuxTcKernels.cu -------------------------------------------------------------------------------- /csrc/gemm_groupwise_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/gemm_groupwise_sm100.cu -------------------------------------------------------------------------------- /csrc/gemm_groupwise_sm100_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/gemm_groupwise_sm100_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/gemm_groupwise_sm120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/gemm_groupwise_sm120.cu -------------------------------------------------------------------------------- /csrc/gemm_groupwise_sm120_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/gemm_groupwise_sm120_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/gemm_sm100_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/gemm_sm100_binding.cu -------------------------------------------------------------------------------- /csrc/gemm_sm120_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/gemm_sm120_binding.cu -------------------------------------------------------------------------------- /csrc/group_gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm.cu -------------------------------------------------------------------------------- /csrc/group_gemm_fp8_groupwise_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm_fp8_groupwise_sm100.cu -------------------------------------------------------------------------------- /csrc/group_gemm_fp8_groupwise_sm100_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm_fp8_groupwise_sm100_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/group_gemm_fp8_groupwise_sm120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm_fp8_groupwise_sm120.cu -------------------------------------------------------------------------------- /csrc/group_gemm_fp8_groupwise_sm120_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm_fp8_groupwise_sm120_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/group_gemm_mxfp4_groupwise_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm_mxfp4_groupwise_sm100.cu -------------------------------------------------------------------------------- /csrc/group_gemm_mxfp4_groupwise_sm100_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm_mxfp4_groupwise_sm100_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/group_gemm_sm100_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm_sm100_binding.cu -------------------------------------------------------------------------------- /csrc/group_gemm_sm120_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm_sm120_binding.cu -------------------------------------------------------------------------------- /csrc/group_gemm_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm_sm90.cu -------------------------------------------------------------------------------- /csrc/group_gemm_sm90_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/group_gemm_sm90_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/logging.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/logging.cc -------------------------------------------------------------------------------- /csrc/norm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/norm.cu -------------------------------------------------------------------------------- /csrc/nv_internal/cpp/common/envUtils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/cpp/common/envUtils.cpp -------------------------------------------------------------------------------- /csrc/nv_internal/cpp/common/logger.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/cpp/common/logger.cpp -------------------------------------------------------------------------------- /csrc/nv_internal/cpp/common/memoryUtils.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/cpp/common/memoryUtils.cu -------------------------------------------------------------------------------- /csrc/nv_internal/cpp/common/stringUtils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/cpp/common/stringUtils.cpp -------------------------------------------------------------------------------- /csrc/nv_internal/cpp/common/tllmException.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/cpp/common/tllmException.cpp -------------------------------------------------------------------------------- /csrc/nv_internal/cpp/kernels/quantization.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/cpp/kernels/quantization.cu -------------------------------------------------------------------------------- /csrc/nv_internal/include/tensorrt_llm/common/NvInferRuntime.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/include/tensorrt_llm/common/NvInferRuntime.h -------------------------------------------------------------------------------- /csrc/nv_internal/include/tensorrt_llm/common/assert.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/include/tensorrt_llm/common/assert.h -------------------------------------------------------------------------------- /csrc/nv_internal/include/tensorrt_llm/common/cudaBf16Wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/include/tensorrt_llm/common/cudaBf16Wrapper.h -------------------------------------------------------------------------------- /csrc/nv_internal/include/tensorrt_llm/common/cudaFp8Utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/include/tensorrt_llm/common/cudaFp8Utils.h -------------------------------------------------------------------------------- /csrc/nv_internal/include/tensorrt_llm/common/cudaUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/include/tensorrt_llm/common/cudaUtils.h -------------------------------------------------------------------------------- /csrc/nv_internal/include/tensorrt_llm/common/dataType.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/include/tensorrt_llm/common/dataType.h -------------------------------------------------------------------------------- /csrc/nv_internal/include/tensorrt_llm/common/logger.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/include/tensorrt_llm/common/logger.h -------------------------------------------------------------------------------- /csrc/nv_internal/include/tensorrt_llm/common/quantization.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/include/tensorrt_llm/common/quantization.h -------------------------------------------------------------------------------- /csrc/nv_internal/include/tensorrt_llm/common/stringUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/include/tensorrt_llm/common/stringUtils.h -------------------------------------------------------------------------------- /csrc/nv_internal/include/tensorrt_llm/common/tllmException.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/include/tensorrt_llm/common/tllmException.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/common/cublasMMWrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/common/cublasMMWrapper.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/common/cudaBf16Fallbacks.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/common/cudaBf16Fallbacks.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/common/cudaDriverWrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/common/cudaDriverWrapper.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/common/cudaTypeUtils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/common/cudaTypeUtils.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/common/envUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/common/envUtils.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/common/memoryUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/common/memoryUtils.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/common/quantTypeUtils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/common/quantTypeUtils.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/common/reduceKernelUtils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/common/reduceKernelUtils.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/common/workspace.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/common/workspace.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/fp8_gemm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/fp8_gemm.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/fp8_gemm_impl.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/fp8_gemm_impl.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/jit_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/jit_utils.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/mma_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/mma_utils.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/nvrtc_cutlass.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/nvrtc_cutlass.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/nvrtc_std.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/nvrtc_std.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/runtime.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/runtime.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/scheduler.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/scheduler.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/tma_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/tma_utils.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/deep_gemm/utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/deep_gemm/utils.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/cutlass_heuristic.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/cutlass_heuristic.cpp -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/cutlass_heuristic.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/cutlass_heuristic.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/cutlass_type_conversion.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/include/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/include/common.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/include/moe_gemm_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/include/moe_gemm_kernels.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/include/moe_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/include/moe_kernels.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/include/moe_util_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/include/moe_util_kernels.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/delayStream.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/delayStream.cu -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/delayStream.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/delayStream.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/lora/lora.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/lora/lora.cpp -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/lora/lora.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/lora/lora.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/preQuantScaleKernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/preQuantScaleKernel.cu -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/preQuantScaleKernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/preQuantScaleKernel.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/quantization.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/quantization.cuh -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/kernels/quantization.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/kernels/quantization.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/thop/fp4Op.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/thop/fp4Op.cpp -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/thop/fp4Quantize.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/thop/fp4Quantize.cpp -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/thop/fp4Quantize.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/thop/fp4Quantize.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/thop/fp8Quantize.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/thop/fp8Quantize.cpp -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/thop/fp8Quantize.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/thop/fp8Quantize.h -------------------------------------------------------------------------------- /csrc/nv_internal/tensorrt_llm/thop/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nv_internal/tensorrt_llm/thop/utils.h -------------------------------------------------------------------------------- /csrc/nvshmem_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/nvshmem_binding.cu -------------------------------------------------------------------------------- /csrc/page.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/page.cu -------------------------------------------------------------------------------- /csrc/pod.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/pod.cu -------------------------------------------------------------------------------- /csrc/pod_customize_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/pod_customize_config.jinja -------------------------------------------------------------------------------- /csrc/pod_jit_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/pod_jit_binding.cu -------------------------------------------------------------------------------- /csrc/pod_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/pod_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/quantization.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/quantization.cu -------------------------------------------------------------------------------- /csrc/renorm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/renorm.cu -------------------------------------------------------------------------------- /csrc/rope.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/rope.cu -------------------------------------------------------------------------------- /csrc/runtime_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/runtime_utils.h -------------------------------------------------------------------------------- /csrc/sampling.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/sampling.cu -------------------------------------------------------------------------------- /csrc/sampling_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/sampling_utils.h -------------------------------------------------------------------------------- /csrc/single_decode.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_decode.cu -------------------------------------------------------------------------------- /csrc/single_decode_customize_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_decode_customize_config.jinja -------------------------------------------------------------------------------- /csrc/single_decode_jit_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_decode_jit_binding.cu -------------------------------------------------------------------------------- /csrc/single_decode_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_decode_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/single_prefill.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_prefill.cu -------------------------------------------------------------------------------- /csrc/single_prefill_customize_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_prefill_customize_config.jinja -------------------------------------------------------------------------------- /csrc/single_prefill_fp8_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_prefill_fp8_sm90.cu -------------------------------------------------------------------------------- /csrc/single_prefill_fp8_sm90_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_prefill_fp8_sm90_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/single_prefill_jit_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_prefill_jit_binding.cu -------------------------------------------------------------------------------- /csrc/single_prefill_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_prefill_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/single_prefill_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_prefill_sm90.cu -------------------------------------------------------------------------------- /csrc/single_prefill_sm90_customize_config.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_prefill_sm90_customize_config.jinja -------------------------------------------------------------------------------- /csrc/single_prefill_sm90_jit_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_prefill_sm90_jit_binding.cu -------------------------------------------------------------------------------- /csrc/single_prefill_sm90_kernel_inst.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/single_prefill_sm90_kernel_inst.jinja -------------------------------------------------------------------------------- /csrc/tgv_gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/tgv_gemm.cu -------------------------------------------------------------------------------- /csrc/tgv_gemm.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/tgv_gemm.jinja -------------------------------------------------------------------------------- /csrc/trtllm_allreduce.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_allreduce.cu -------------------------------------------------------------------------------- /csrc/trtllm_allreduce_fusion.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_allreduce_fusion.cu -------------------------------------------------------------------------------- /csrc/trtllm_alltoall.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_alltoall.cu -------------------------------------------------------------------------------- /csrc/trtllm_alltoall_prepare.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_alltoall_prepare.cu -------------------------------------------------------------------------------- /csrc/trtllm_batched_gemm_runner.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_batched_gemm_runner.cu -------------------------------------------------------------------------------- /csrc/trtllm_fmha_kernel_launcher.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_fmha_kernel_launcher.cu -------------------------------------------------------------------------------- /csrc/trtllm_fmha_v2_binding.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_fmha_v2_binding.cu -------------------------------------------------------------------------------- /csrc/trtllm_fused_moe_dev_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_fused_moe_dev_kernel.cu -------------------------------------------------------------------------------- /csrc/trtllm_fused_moe_kernel_launcher.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_fused_moe_kernel_launcher.cu -------------------------------------------------------------------------------- /csrc/trtllm_fused_moe_routing_deepseek.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_fused_moe_routing_deepseek.cu -------------------------------------------------------------------------------- /csrc/trtllm_fused_moe_routing_llama4.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_fused_moe_routing_llama4.cu -------------------------------------------------------------------------------- /csrc/trtllm_fused_moe_routing_renormalize.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_fused_moe_routing_renormalize.cu -------------------------------------------------------------------------------- /csrc/trtllm_fused_moe_runner.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_fused_moe_runner.cu -------------------------------------------------------------------------------- /csrc/trtllm_gemm_runner.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_gemm_runner.cu -------------------------------------------------------------------------------- /csrc/trtllm_low_latency_gemm_runner.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_low_latency_gemm_runner.cu -------------------------------------------------------------------------------- /csrc/trtllm_mnnvl_allreduce.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_mnnvl_allreduce.cu -------------------------------------------------------------------------------- /csrc/trtllm_moe_allreduce_fusion.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/trtllm_moe_allreduce_fusion.cu -------------------------------------------------------------------------------- /csrc/tvm_ffi_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/tvm_ffi_utils.h -------------------------------------------------------------------------------- /csrc/vllm_custom_all_reduce.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/vllm_custom_all_reduce.cu -------------------------------------------------------------------------------- /csrc/xqa/barriers.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/barriers.cuh -------------------------------------------------------------------------------- /csrc/xqa/cuda_hint.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/cuda_hint.cuh -------------------------------------------------------------------------------- /csrc/xqa/defines.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/defines.h -------------------------------------------------------------------------------- /csrc/xqa/gmma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/gmma.cuh -------------------------------------------------------------------------------- /csrc/xqa/gmma_impl.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/gmma_impl.cuh -------------------------------------------------------------------------------- /csrc/xqa/hostUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/hostUtils.h -------------------------------------------------------------------------------- /csrc/xqa/ldgsts.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/ldgsts.cuh -------------------------------------------------------------------------------- /csrc/xqa/mha.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/mha.cu -------------------------------------------------------------------------------- /csrc/xqa/mha.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/mha.h -------------------------------------------------------------------------------- /csrc/xqa/mhaUtils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/mhaUtils.cuh -------------------------------------------------------------------------------- /csrc/xqa/mha_components.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/mha_components.cuh -------------------------------------------------------------------------------- /csrc/xqa/mha_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/mha_sm90.cu -------------------------------------------------------------------------------- /csrc/xqa/mha_stdheaders.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/mha_stdheaders.cuh -------------------------------------------------------------------------------- /csrc/xqa/mla_sm120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/mla_sm120.cu -------------------------------------------------------------------------------- /csrc/xqa/mla_sm120.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/mla_sm120.cuh -------------------------------------------------------------------------------- /csrc/xqa/mma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/mma.cuh -------------------------------------------------------------------------------- /csrc/xqa/platform.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/platform.h -------------------------------------------------------------------------------- /csrc/xqa/specDec.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/specDec.h -------------------------------------------------------------------------------- /csrc/xqa/tensorMap.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/tensorMap.cpp -------------------------------------------------------------------------------- /csrc/xqa/tensorMap.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/tensorMap.h -------------------------------------------------------------------------------- /csrc/xqa/tma.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/tma.h -------------------------------------------------------------------------------- /csrc/xqa/utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/utils.cuh -------------------------------------------------------------------------------- /csrc/xqa/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/utils.h -------------------------------------------------------------------------------- /csrc/xqa/xqa_wrapper.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/csrc/xqa/xqa_wrapper.cu -------------------------------------------------------------------------------- /docker/Dockerfile.cu126: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/Dockerfile.cu126 -------------------------------------------------------------------------------- /docker/Dockerfile.cu126.dev: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/Dockerfile.cu126.dev -------------------------------------------------------------------------------- /docker/Dockerfile.cu128: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/Dockerfile.cu128 -------------------------------------------------------------------------------- /docker/Dockerfile.cu128.dev: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/Dockerfile.cu128.dev -------------------------------------------------------------------------------- /docker/Dockerfile.cu129: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/Dockerfile.cu129 -------------------------------------------------------------------------------- /docker/Dockerfile.cu129.dev: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/Dockerfile.cu129.dev -------------------------------------------------------------------------------- /docker/Dockerfile.cu130: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/Dockerfile.cu130 -------------------------------------------------------------------------------- /docker/Dockerfile.cu130.dev: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/Dockerfile.cu130.dev -------------------------------------------------------------------------------- /docker/bash.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/bash.sh -------------------------------------------------------------------------------- /docker/install/install_python.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/install/install_python.sh -------------------------------------------------------------------------------- /docker/install/install_python_packages.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docker/install/install_python_packages.sh -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/_static/FlashInfer-black-background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/_static/FlashInfer-black-background.png -------------------------------------------------------------------------------- /docs/_static/FlashInfer-white-background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/_static/FlashInfer-white-background.png -------------------------------------------------------------------------------- /docs/api/activation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/activation.rst -------------------------------------------------------------------------------- /docs/api/attention.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/attention.rst -------------------------------------------------------------------------------- /docs/api/cascade.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/cascade.rst -------------------------------------------------------------------------------- /docs/api/comm.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/comm.rst -------------------------------------------------------------------------------- /docs/api/fp4_quantization.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/fp4_quantization.rst -------------------------------------------------------------------------------- /docs/api/fused_moe.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/fused_moe.rst -------------------------------------------------------------------------------- /docs/api/gemm.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/gemm.rst -------------------------------------------------------------------------------- /docs/api/green_ctx.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/green_ctx.rst -------------------------------------------------------------------------------- /docs/api/logits_processor.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/logits_processor.rst -------------------------------------------------------------------------------- /docs/api/norm.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/norm.rst -------------------------------------------------------------------------------- /docs/api/page.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/page.rst -------------------------------------------------------------------------------- /docs/api/quantization.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/quantization.rst -------------------------------------------------------------------------------- /docs/api/rope.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/rope.rst -------------------------------------------------------------------------------- /docs/api/sampling.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/sampling.rst -------------------------------------------------------------------------------- /docs/api/sparse.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/sparse.rst -------------------------------------------------------------------------------- /docs/api/testing.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/api/testing.rst -------------------------------------------------------------------------------- /docs/build_docs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/build_docs.sh -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/installation.rst -------------------------------------------------------------------------------- /docs/logging.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/logging.rst -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/requirements.txt -------------------------------------------------------------------------------- /docs/tutorials/kv_layout.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/tutorials/kv_layout.rst -------------------------------------------------------------------------------- /docs/tutorials/recursive_attention.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/tutorials/recursive_attention.rst -------------------------------------------------------------------------------- /docs/wrap_run_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/docs/wrap_run_llm.py -------------------------------------------------------------------------------- /flashinfer-cubin/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer-cubin/.gitignore -------------------------------------------------------------------------------- /flashinfer-cubin/build_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer-cubin/build_backend.py -------------------------------------------------------------------------------- /flashinfer-cubin/flashinfer_cubin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer-cubin/flashinfer_cubin/__init__.py -------------------------------------------------------------------------------- /flashinfer-cubin/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer-cubin/pyproject.toml -------------------------------------------------------------------------------- /flashinfer-jit-cache/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer-jit-cache/.gitignore -------------------------------------------------------------------------------- /flashinfer-jit-cache/build_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer-jit-cache/build_backend.py -------------------------------------------------------------------------------- /flashinfer-jit-cache/flashinfer_jit_cache/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer-jit-cache/flashinfer_jit_cache/__init__.py -------------------------------------------------------------------------------- /flashinfer-jit-cache/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer-jit-cache/pyproject.toml -------------------------------------------------------------------------------- /flashinfer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/__init__.py -------------------------------------------------------------------------------- /flashinfer/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/__main__.py -------------------------------------------------------------------------------- /flashinfer/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/activation.py -------------------------------------------------------------------------------- /flashinfer/aot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/aot.py -------------------------------------------------------------------------------- /flashinfer/api_logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/api_logging.py -------------------------------------------------------------------------------- /flashinfer/artifacts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/artifacts.py -------------------------------------------------------------------------------- /flashinfer/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/attention.py -------------------------------------------------------------------------------- /flashinfer/autotuner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/autotuner.py -------------------------------------------------------------------------------- /flashinfer/cascade.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/cascade.py -------------------------------------------------------------------------------- /flashinfer/comm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/__init__.py -------------------------------------------------------------------------------- /flashinfer/comm/cuda_ipc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/cuda_ipc.py -------------------------------------------------------------------------------- /flashinfer/comm/dlpack_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/dlpack_utils.py -------------------------------------------------------------------------------- /flashinfer/comm/mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/mapping.py -------------------------------------------------------------------------------- /flashinfer/comm/mnnvl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/mnnvl.py -------------------------------------------------------------------------------- /flashinfer/comm/nvshmem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/nvshmem.py -------------------------------------------------------------------------------- /flashinfer/comm/nvshmem_allreduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/nvshmem_allreduce.py -------------------------------------------------------------------------------- /flashinfer/comm/trtllm_alltoall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/trtllm_alltoall.py -------------------------------------------------------------------------------- /flashinfer/comm/trtllm_ar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/trtllm_ar.py -------------------------------------------------------------------------------- /flashinfer/comm/trtllm_mnnvl_ar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/trtllm_mnnvl_ar.py -------------------------------------------------------------------------------- /flashinfer/comm/vllm_ar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/comm/vllm_ar.py -------------------------------------------------------------------------------- /flashinfer/compilation_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/compilation_context.py -------------------------------------------------------------------------------- /flashinfer/cuda_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/cuda_utils.py -------------------------------------------------------------------------------- /flashinfer/cudnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/cudnn/__init__.py -------------------------------------------------------------------------------- /flashinfer/cudnn/decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/cudnn/decode.py -------------------------------------------------------------------------------- /flashinfer/cudnn/prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/cudnn/prefill.py -------------------------------------------------------------------------------- /flashinfer/cudnn/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/cudnn/utils.py -------------------------------------------------------------------------------- /flashinfer/cute_dsl/blockscaled_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/cute_dsl/blockscaled_gemm.py -------------------------------------------------------------------------------- /flashinfer/cute_dsl/gemm_allreduce_two_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/cute_dsl/gemm_allreduce_two_shot.py -------------------------------------------------------------------------------- /flashinfer/cute_dsl/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/cute_dsl/utils.py -------------------------------------------------------------------------------- /flashinfer/decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/decode.py -------------------------------------------------------------------------------- /flashinfer/deep_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/deep_gemm.py -------------------------------------------------------------------------------- /flashinfer/dsv3_ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/dsv3_ops/__init__.py -------------------------------------------------------------------------------- /flashinfer/fp4_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/fp4_quantization.py -------------------------------------------------------------------------------- /flashinfer/fp8_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/fp8_quantization.py -------------------------------------------------------------------------------- /flashinfer/fused_moe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/fused_moe/__init__.py -------------------------------------------------------------------------------- /flashinfer/fused_moe/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/fused_moe/core.py -------------------------------------------------------------------------------- /flashinfer/fused_moe/fused_routing_dsv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/fused_moe/fused_routing_dsv3.py -------------------------------------------------------------------------------- /flashinfer/fused_moe/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/fused_moe/utils.py -------------------------------------------------------------------------------- /flashinfer/gemm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/gemm/__init__.py -------------------------------------------------------------------------------- /flashinfer/gemm/gemm_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/gemm/gemm_base.py -------------------------------------------------------------------------------- /flashinfer/gemm/routergemm_dsv3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/gemm/routergemm_dsv3.py -------------------------------------------------------------------------------- /flashinfer/green_ctx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/green_ctx.py -------------------------------------------------------------------------------- /flashinfer/jit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/__init__.py -------------------------------------------------------------------------------- /flashinfer/jit/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/activation.py -------------------------------------------------------------------------------- /flashinfer/jit/attention/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/attention/__init__.py -------------------------------------------------------------------------------- /flashinfer/jit/attention/fmha_v2/generate_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/attention/fmha_v2/generate_kernels.py -------------------------------------------------------------------------------- /flashinfer/jit/attention/fmha_v2/generator_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/attention/fmha_v2/generator_utils.py -------------------------------------------------------------------------------- /flashinfer/jit/attention/modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/attention/modules.py -------------------------------------------------------------------------------- /flashinfer/jit/attention/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/attention/utils.py -------------------------------------------------------------------------------- /flashinfer/jit/attention/variants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/attention/variants.py -------------------------------------------------------------------------------- /flashinfer/jit/cascade.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/cascade.py -------------------------------------------------------------------------------- /flashinfer/jit/comm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/comm.py -------------------------------------------------------------------------------- /flashinfer/jit/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/core.py -------------------------------------------------------------------------------- /flashinfer/jit/cpp_ext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/cpp_ext.py -------------------------------------------------------------------------------- /flashinfer/jit/cubin_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/cubin_loader.py -------------------------------------------------------------------------------- /flashinfer/jit/dsv3_optimizations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/dsv3_optimizations.py -------------------------------------------------------------------------------- /flashinfer/jit/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/env.py -------------------------------------------------------------------------------- /flashinfer/jit/fp4_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/fp4_quantization.py -------------------------------------------------------------------------------- /flashinfer/jit/fp8_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/fp8_quantization.py -------------------------------------------------------------------------------- /flashinfer/jit/fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/fused_moe.py -------------------------------------------------------------------------------- /flashinfer/jit/gemm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/gemm/__init__.py -------------------------------------------------------------------------------- /flashinfer/jit/gemm/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/gemm/core.py -------------------------------------------------------------------------------- /flashinfer/jit/gemm/cutlass/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flashinfer/jit/gemm/cutlass/cutlass_library.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/gemm/cutlass/cutlass_library.py -------------------------------------------------------------------------------- /flashinfer/jit/gemm/cutlass/generate_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/gemm/cutlass/generate_kernels.py -------------------------------------------------------------------------------- /flashinfer/jit/gemm/deepgemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/gemm/deepgemm.py -------------------------------------------------------------------------------- /flashinfer/jit/mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/mla.py -------------------------------------------------------------------------------- /flashinfer/jit/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/norm.py -------------------------------------------------------------------------------- /flashinfer/jit/page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/page.py -------------------------------------------------------------------------------- /flashinfer/jit/quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/quantization.py -------------------------------------------------------------------------------- /flashinfer/jit/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/rope.py -------------------------------------------------------------------------------- /flashinfer/jit/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/sampling.py -------------------------------------------------------------------------------- /flashinfer/jit/spdlog.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/spdlog.py -------------------------------------------------------------------------------- /flashinfer/jit/tllm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/tllm_utils.py -------------------------------------------------------------------------------- /flashinfer/jit/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/utils.py -------------------------------------------------------------------------------- /flashinfer/jit/xqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/jit/xqa.py -------------------------------------------------------------------------------- /flashinfer/logits_processor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/logits_processor/__init__.py -------------------------------------------------------------------------------- /flashinfer/logits_processor/compiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/logits_processor/compiler.py -------------------------------------------------------------------------------- /flashinfer/logits_processor/fusion_rules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/logits_processor/fusion_rules.py -------------------------------------------------------------------------------- /flashinfer/logits_processor/legalization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/logits_processor/legalization.py -------------------------------------------------------------------------------- /flashinfer/logits_processor/op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/logits_processor/op.py -------------------------------------------------------------------------------- /flashinfer/logits_processor/operators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/logits_processor/operators.py -------------------------------------------------------------------------------- /flashinfer/logits_processor/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/logits_processor/pipeline.py -------------------------------------------------------------------------------- /flashinfer/logits_processor/processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/logits_processor/processors.py -------------------------------------------------------------------------------- /flashinfer/logits_processor/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/logits_processor/types.py -------------------------------------------------------------------------------- /flashinfer/logits_processor/validators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/logits_processor/validators.py -------------------------------------------------------------------------------- /flashinfer/mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/mla.py -------------------------------------------------------------------------------- /flashinfer/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/norm.py -------------------------------------------------------------------------------- /flashinfer/page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/page.py -------------------------------------------------------------------------------- /flashinfer/pod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/pod.py -------------------------------------------------------------------------------- /flashinfer/prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/prefill.py -------------------------------------------------------------------------------- /flashinfer/profiler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/profiler/__init__.py -------------------------------------------------------------------------------- /flashinfer/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flashinfer/quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/quantization.py -------------------------------------------------------------------------------- /flashinfer/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/rope.py -------------------------------------------------------------------------------- /flashinfer/sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/sampling.py -------------------------------------------------------------------------------- /flashinfer/sparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/sparse.py -------------------------------------------------------------------------------- /flashinfer/testing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/testing/__init__.py -------------------------------------------------------------------------------- /flashinfer/testing/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/testing/utils.py -------------------------------------------------------------------------------- /flashinfer/tllm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/tllm_utils.py -------------------------------------------------------------------------------- /flashinfer/triton/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/__init__.py -------------------------------------------------------------------------------- /flashinfer/triton/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/activation.py -------------------------------------------------------------------------------- /flashinfer/triton/cascade.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/cascade.py -------------------------------------------------------------------------------- /flashinfer/triton/gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/gemm.py -------------------------------------------------------------------------------- /flashinfer/triton/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flashinfer/triton/kernels/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/kernels/activation.py -------------------------------------------------------------------------------- /flashinfer/triton/kernels/cascade.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/kernels/cascade.py -------------------------------------------------------------------------------- /flashinfer/triton/kernels/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/kernels/norm.py -------------------------------------------------------------------------------- /flashinfer/triton/kernels/quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/kernels/quant.py -------------------------------------------------------------------------------- /flashinfer/triton/kernels/sm_constraint_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/kernels/sm_constraint_gemm.py -------------------------------------------------------------------------------- /flashinfer/triton/norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/norm.py -------------------------------------------------------------------------------- /flashinfer/triton/page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/page.py -------------------------------------------------------------------------------- /flashinfer/triton/sm_constraint_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/sm_constraint_gemm.py -------------------------------------------------------------------------------- /flashinfer/triton/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/triton/utils.py -------------------------------------------------------------------------------- /flashinfer/trtllm_low_latency_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/trtllm_low_latency_gemm.py -------------------------------------------------------------------------------- /flashinfer/tuning_configs/v0_1_trtllm_fused_moe_NVIDIA_B200.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/tuning_configs/v0_1_trtllm_fused_moe_NVIDIA_B200.py -------------------------------------------------------------------------------- /flashinfer/tuning_configs/v0_1_trtllm_fused_moe_NVIDIA_GB200.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/tuning_configs/v0_1_trtllm_fused_moe_NVIDIA_GB200.py -------------------------------------------------------------------------------- /flashinfer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/utils.py -------------------------------------------------------------------------------- /flashinfer/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/version.py -------------------------------------------------------------------------------- /flashinfer/xqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/flashinfer/xqa.py -------------------------------------------------------------------------------- /include/flashinfer/activation.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/activation.cuh -------------------------------------------------------------------------------- /include/flashinfer/allocator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/allocator.h -------------------------------------------------------------------------------- /include/flashinfer/arch_condition.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/arch_condition.h -------------------------------------------------------------------------------- /include/flashinfer/attention/batch_pod.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/batch_pod.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/collective/fmha_common.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/collective/fmha_common.hpp -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/collective/fmha_fusion.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/collective/fmha_fusion.hpp -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/common/pow_2.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/common/pow_2.hpp -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/device/fmha.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/device/fmha.hpp -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/device/sm100_mla.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/device/sm100_mla.hpp -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/fmha_cutlass_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/fmha_cutlass_sm100.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/kernel/fmha_options.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/kernel/fmha_options.hpp -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/kernel/fmha_tile_scheduler.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/kernel/fmha_tile_scheduler.hpp -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/kernel/gather_tensor.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/kernel/gather_tensor.hpp -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/kernel/sm100_fmha_mla_reduction.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/kernel/sm100_fmha_mla_reduction.hpp -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/kernel/sm100_mla_tile_scheduler.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/kernel/sm100_mla_tile_scheduler.hpp -------------------------------------------------------------------------------- /include/flashinfer/attention/blackwell/plan.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/blackwell/plan.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/cascade.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/cascade.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/cutlass_mla.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/cutlass_mla.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/decode.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/decode.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/decode_mla_cute_sm80.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/decode_mla_cute_sm80.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/default_decode_params.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/default_decode_params.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/default_prefill_params.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/default_prefill_params.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/heap.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/heap.h -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/attention_updater.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/attention_updater.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/block_sparse_gather.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/block_sparse_gather.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/default_params.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/default_params.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/epilogue.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/epilogue.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/kernel_traits.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/kernel_traits.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/mainloop.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/mainloop.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/mainloop_mma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/mainloop_mma.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/named_barrier.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/named_barrier.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/prefill_sm90.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/prefill_sm90.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/quantization/epilogue.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/quantization/epilogue.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/quantization/kernel_traits.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/quantization/kernel_traits.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/quantization/mainloop_load.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/quantization/mainloop_load.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/quantization/mainloop_mma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/quantization/mainloop_mma.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/quantization/mainloop_sparse_load.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/quantization/mainloop_sparse_load.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/quantization/prefill_sm90.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/quantization/prefill_sm90.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/sparse_mainloop.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/sparse_mainloop.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/tile_scheduler.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/tile_scheduler.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/utils.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/variant_helper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/variant_helper.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/hopper/variants.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/hopper/variants.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/mask.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/mask.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/mla.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/mla.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/mla_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/mla_hopper.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/mla_params.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/mla_params.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/persistent.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/persistent.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/persistent_template.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/persistent_template.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/pod.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/pod.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/prefill.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/prefill.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/scheduler.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/scheduler.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/state.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/state.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/variant_helper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/variant_helper.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention/variants.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention/variants.cuh -------------------------------------------------------------------------------- /include/flashinfer/attention_impl.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/attention_impl.cuh -------------------------------------------------------------------------------- /include/flashinfer/comm/trtllm_allreduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/comm/trtllm_allreduce.cuh -------------------------------------------------------------------------------- /include/flashinfer/comm/trtllm_allreduce_fusion.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/comm/trtllm_allreduce_fusion.cuh -------------------------------------------------------------------------------- /include/flashinfer/comm/trtllm_alltoall.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/comm/trtllm_alltoall.cuh -------------------------------------------------------------------------------- /include/flashinfer/comm/trtllm_alltoall_prepare.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/comm/trtllm_alltoall_prepare.cuh -------------------------------------------------------------------------------- /include/flashinfer/comm/trtllm_mnnvl_allreduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/comm/trtllm_mnnvl_allreduce.cuh -------------------------------------------------------------------------------- /include/flashinfer/comm/trtllm_moe_allreduce_fusion.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/comm/trtllm_moe_allreduce_fusion.cuh -------------------------------------------------------------------------------- /include/flashinfer/comm/vllm_custom_all_reduce.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/comm/vllm_custom_all_reduce.cuh -------------------------------------------------------------------------------- /include/flashinfer/cp_async.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/cp_async.cuh -------------------------------------------------------------------------------- /include/flashinfer/cubin_loader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/cubin_loader.h -------------------------------------------------------------------------------- /include/flashinfer/cutlass_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/cutlass_utils.cuh -------------------------------------------------------------------------------- /include/flashinfer/exception.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/exception.h -------------------------------------------------------------------------------- /include/flashinfer/fastdiv.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/fastdiv.cuh -------------------------------------------------------------------------------- /include/flashinfer/fp16.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/fp16.h -------------------------------------------------------------------------------- /include/flashinfer/fp4_layout.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/fp4_layout.cuh -------------------------------------------------------------------------------- /include/flashinfer/frag_layout_swizzle.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/frag_layout_swizzle.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/bmm_fp8.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/bmm_fp8.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/cutlass_gemm_configs.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/cutlass_gemm_configs.h -------------------------------------------------------------------------------- /include/flashinfer/gemm/dsv3_router_gemm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/dsv3_router_gemm.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/fp4_gemm_cutlass.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/fp4_gemm_cutlass.h -------------------------------------------------------------------------------- /include/flashinfer/gemm/fp4_gemm_cutlass_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/fp4_gemm_cutlass_template.h -------------------------------------------------------------------------------- /include/flashinfer/gemm/fp4_gemm_cutlass_template_sm120.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/fp4_gemm_cutlass_template_sm120.h -------------------------------------------------------------------------------- /include/flashinfer/gemm/fp4_gemm_template_sm100.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/fp4_gemm_template_sm100.h -------------------------------------------------------------------------------- /include/flashinfer/gemm/fp4_gemm_template_sm120.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/fp4_gemm_template_sm120.h -------------------------------------------------------------------------------- /include/flashinfer/gemm/fp8_gemm_cutlass.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/fp8_gemm_cutlass.h -------------------------------------------------------------------------------- /include/flashinfer/gemm/fp8_gemm_cutlass_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/fp8_gemm_cutlass_template.h -------------------------------------------------------------------------------- /include/flashinfer/gemm/fp8_gemm_template_sm100.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/fp8_gemm_template_sm100.h -------------------------------------------------------------------------------- /include/flashinfer/gemm/gemm_groupwise_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/gemm_groupwise_sm100.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/gemm_groupwise_sm120.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/gemm_groupwise_sm120.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/group_gemm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/group_gemm.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/group_gemm_fp8_groupwise_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/group_gemm_fp8_groupwise_sm100.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/group_gemm_fp8_groupwise_sm120.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/group_gemm_fp8_groupwise_sm120.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/group_gemm_lora.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/group_gemm_lora.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/group_gemm_mxfp4_groupwise_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/group_gemm_mxfp4_groupwise_sm100.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/group_gemm_sm90.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/group_gemm_sm90.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/group_gemv.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/group_gemv.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/tgv_gemm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/tgv_gemm.cuh -------------------------------------------------------------------------------- /include/flashinfer/gemm/tgv_gemm_configs.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/tgv_gemm_configs.h -------------------------------------------------------------------------------- /include/flashinfer/gemm/tgv_gemm_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/gemm/tgv_gemm_template.h -------------------------------------------------------------------------------- /include/flashinfer/layout.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/layout.cuh -------------------------------------------------------------------------------- /include/flashinfer/logging.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/logging.h -------------------------------------------------------------------------------- /include/flashinfer/math.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/math.cuh -------------------------------------------------------------------------------- /include/flashinfer/mma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/mma.cuh -------------------------------------------------------------------------------- /include/flashinfer/norm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/norm.cuh -------------------------------------------------------------------------------- /include/flashinfer/page.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/page.cuh -------------------------------------------------------------------------------- /include/flashinfer/permuted_smem.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/permuted_smem.cuh -------------------------------------------------------------------------------- /include/flashinfer/pos_enc.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/pos_enc.cuh -------------------------------------------------------------------------------- /include/flashinfer/profiler.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/profiler.cuh -------------------------------------------------------------------------------- /include/flashinfer/quantization.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/quantization.cuh -------------------------------------------------------------------------------- /include/flashinfer/sampling.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/sampling.cuh -------------------------------------------------------------------------------- /include/flashinfer/trtllm/batched_gemm/KernelRunner.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/batched_gemm/KernelRunner.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/BatchedGemmEnums.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/BatchedGemmEnums.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/Enums.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/Enums.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/GemmOptions.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/GemmOptions.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/KernelParams.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/KernelParams.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/KernelParamsDecl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/KernelParamsDecl.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/KernelTraits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/KernelTraits.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/TmaDescriptor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/TmaDescriptor.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/common.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/common/cudaBf16Fallbacks.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/common/cudaBf16Fallbacks.cuh -------------------------------------------------------------------------------- /include/flashinfer/trtllm/common/cudaBf16Wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/common/cudaBf16Wrapper.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/common/cudaFp8Utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/common/cudaFp8Utils.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/common/cudaTypeUtils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/common/cudaTypeUtils.cuh -------------------------------------------------------------------------------- /include/flashinfer/trtllm/common/cudaUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/common/cudaUtils.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/common/reduceKernelUtils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/common/reduceKernelUtils.cuh -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fmha/decoder_impl_common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fmha/decoder_impl_common.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fmha/decoder_params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fmha/decoder_params.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fmha/fmhaKernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fmha/fmhaKernels.cuh -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fmha/fmhaReduction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fmha/fmhaReduction.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fmha/fmhaRunner.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fmha/fmhaRunner.cuh -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fmha/fmhaRunnerParams.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fmha/fmhaRunnerParams.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fmha/kernelParams.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fmha/kernelParams.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fmha/kernelUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fmha/kernelUtils.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fmha/lse.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fmha/lse.cuh -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fused_moe/DevKernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fused_moe/DevKernel.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fused_moe/IntFastDiv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fused_moe/IntFastDiv.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fused_moe/RoutingKernel.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fused_moe/RoutingKernel.cuh -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fused_moe/RoutingKernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fused_moe/RoutingKernel.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fused_moe/RoutingKernelTopK.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fused_moe/RoutingKernelTopK.cuh -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fused_moe/noAuxTcKernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fused_moe/noAuxTcKernels.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/fused_moe/runner.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/fused_moe/runner.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/Enums.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/Enums.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/GemmInterface.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/GemmInterface.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/GemmOptions.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/GemmOptions.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/KernelParams.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/KernelParams.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/KernelParamsDecl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/KernelParamsDecl.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/KernelTraits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/KernelTraits.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/TmaDescriptor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/TmaDescriptor.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/trtllm/gen/CommonUtils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/trtllm/gen/CommonUtils.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/trtllm/gen/DtypeDecl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/trtllm/gen/DtypeDecl.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/trtllm/gen/MmaDecl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/trtllm/gen/MmaDecl.h -------------------------------------------------------------------------------- /include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/trtllm/gen/SfLayoutDecl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/trtllm/gemm/trtllmGen_gemm_export/trtllm/gen/SfLayoutDecl.h -------------------------------------------------------------------------------- /include/flashinfer/utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/utils.cuh -------------------------------------------------------------------------------- /include/flashinfer/vec_dtypes.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/include/flashinfer/vec_dtypes.cuh -------------------------------------------------------------------------------- /licenses/LICENSE.cutlass.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/licenses/LICENSE.cutlass.txt -------------------------------------------------------------------------------- /licenses/LICENSE.flashattention3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/licenses/LICENSE.flashattention3.txt -------------------------------------------------------------------------------- /licenses/LICENSE.fmt.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/licenses/LICENSE.fmt.txt -------------------------------------------------------------------------------- /licenses/LICENSE.spdlog.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/licenses/LICENSE.spdlog.txt -------------------------------------------------------------------------------- /profiler/.gitignore: -------------------------------------------------------------------------------- 1 | *.perfetto-trace 2 | -------------------------------------------------------------------------------- /profiler/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/profiler/README.md -------------------------------------------------------------------------------- /profiler/batch_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/profiler/batch_attention.py -------------------------------------------------------------------------------- /profiler/mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/profiler/mla.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/pyproject.toml -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/pytest.ini -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/authorized_codeowner.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/authorized_codeowner.txt -------------------------------------------------------------------------------- /scripts/build_flashinfer_jit_cache_whl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/build_flashinfer_jit_cache_whl.sh -------------------------------------------------------------------------------- /scripts/ci-flashinfer.env.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/ci-flashinfer.env.example -------------------------------------------------------------------------------- /scripts/ci-flashinfer.service: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/ci-flashinfer.service -------------------------------------------------------------------------------- /scripts/codeowner_analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/codeowner_analyzer.py -------------------------------------------------------------------------------- /scripts/print_jit_cache_summary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/print_jit_cache_summary.py -------------------------------------------------------------------------------- /scripts/task_cpplint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_cpplint.sh -------------------------------------------------------------------------------- /scripts/task_jit_run_tests_part1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_jit_run_tests_part1.sh -------------------------------------------------------------------------------- /scripts/task_jit_run_tests_part2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_jit_run_tests_part2.sh -------------------------------------------------------------------------------- /scripts/task_jit_run_tests_part3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_jit_run_tests_part3.sh -------------------------------------------------------------------------------- /scripts/task_jit_run_tests_part4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_jit_run_tests_part4.sh -------------------------------------------------------------------------------- /scripts/task_jit_run_tests_part5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_jit_run_tests_part5.sh -------------------------------------------------------------------------------- /scripts/task_lint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_lint.sh -------------------------------------------------------------------------------- /scripts/task_mypy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_mypy.sh -------------------------------------------------------------------------------- /scripts/task_pylint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_pylint.sh -------------------------------------------------------------------------------- /scripts/task_show_node_info.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_show_node_info.sh -------------------------------------------------------------------------------- /scripts/task_test_blackwell_kernels.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_test_blackwell_kernels.sh -------------------------------------------------------------------------------- /scripts/task_test_jit_cache_package_build_import.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_test_jit_cache_package_build_import.sh -------------------------------------------------------------------------------- /scripts/task_test_multi_node_comm_kernels.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_test_multi_node_comm_kernels.sh -------------------------------------------------------------------------------- /scripts/task_test_nightly_build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_test_nightly_build.sh -------------------------------------------------------------------------------- /scripts/task_test_single_node_comm_kernels.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/task_test_single_node_comm_kernels.sh -------------------------------------------------------------------------------- /scripts/update_whl_index.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/update_whl_index.py -------------------------------------------------------------------------------- /scripts/verify_all_modules_compiled.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/scripts/verify_all_modules_compiled.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # This file makes the tests directory a Python package 2 | -------------------------------------------------------------------------------- /tests/attention/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/attention/test_alibi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_alibi.py -------------------------------------------------------------------------------- /tests/attention/test_attention_sink.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_attention_sink.py -------------------------------------------------------------------------------- /tests/attention/test_attention_sink_blackwell.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_attention_sink_blackwell.py -------------------------------------------------------------------------------- /tests/attention/test_batch_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_batch_attention.py -------------------------------------------------------------------------------- /tests/attention/test_batch_decode_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_batch_decode_kernels.py -------------------------------------------------------------------------------- /tests/attention/test_batch_invariant_fa2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_batch_invariant_fa2.py -------------------------------------------------------------------------------- /tests/attention/test_batch_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_batch_prefill.py -------------------------------------------------------------------------------- /tests/attention/test_batch_prefill_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_batch_prefill_kernels.py -------------------------------------------------------------------------------- /tests/attention/test_blackwell_fmha.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_blackwell_fmha.py -------------------------------------------------------------------------------- /tests/attention/test_cudnn_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_cudnn_decode.py -------------------------------------------------------------------------------- /tests/attention/test_cudnn_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_cudnn_prefill.py -------------------------------------------------------------------------------- /tests/attention/test_cudnn_prefill_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_cudnn_prefill_deepseek.py -------------------------------------------------------------------------------- /tests/attention/test_decode_fp8_calibration_scale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_decode_fp8_calibration_scale.py -------------------------------------------------------------------------------- /tests/attention/test_decode_prefill_lse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_decode_prefill_lse.py -------------------------------------------------------------------------------- /tests/attention/test_deepseek_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_deepseek_mla.py -------------------------------------------------------------------------------- /tests/attention/test_fmha_v2_prefill_deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_fmha_v2_prefill_deepseek.py -------------------------------------------------------------------------------- /tests/attention/test_fp8_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_fp8_prefill.py -------------------------------------------------------------------------------- /tests/attention/test_hopper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_hopper.py -------------------------------------------------------------------------------- /tests/attention/test_hopper_fp8_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_hopper_fp8_attention.py -------------------------------------------------------------------------------- /tests/attention/test_logits_cap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_logits_cap.py -------------------------------------------------------------------------------- /tests/attention/test_mla_decode_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_mla_decode_kernel.py -------------------------------------------------------------------------------- /tests/attention/test_mla_page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_mla_page.py -------------------------------------------------------------------------------- /tests/attention/test_non_contiguous_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_non_contiguous_decode.py -------------------------------------------------------------------------------- /tests/attention/test_non_contiguous_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_non_contiguous_prefill.py -------------------------------------------------------------------------------- /tests/attention/test_page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_page.py -------------------------------------------------------------------------------- /tests/attention/test_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_rope.py -------------------------------------------------------------------------------- /tests/attention/test_shared_prefix_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_shared_prefix_kernels.py -------------------------------------------------------------------------------- /tests/attention/test_single_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_single_prefill.py -------------------------------------------------------------------------------- /tests/attention/test_sliding_window.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_sliding_window.py -------------------------------------------------------------------------------- /tests/attention/test_tensor_cores_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_tensor_cores_decode.py -------------------------------------------------------------------------------- /tests/attention/test_trtllm_gen_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_trtllm_gen_attention.py -------------------------------------------------------------------------------- /tests/attention/test_trtllm_gen_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_trtllm_gen_mla.py -------------------------------------------------------------------------------- /tests/attention/test_trtllm_ragged_kv_stride.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_trtllm_ragged_kv_stride.py -------------------------------------------------------------------------------- /tests/attention/test_xqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_xqa.py -------------------------------------------------------------------------------- /tests/attention/test_xqa_batch_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_xqa_batch_decode.py -------------------------------------------------------------------------------- /tests/attention/test_xqa_mla_batch_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/attention/test_xqa_mla_batch_decode.py -------------------------------------------------------------------------------- /tests/cli/test_cli_show_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/cli/test_cli_show_config.py -------------------------------------------------------------------------------- /tests/comm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/comm/test_mnnvl_custom_comm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_mnnvl_custom_comm.py -------------------------------------------------------------------------------- /tests/comm/test_mnnvl_memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_mnnvl_memory.py -------------------------------------------------------------------------------- /tests/comm/test_nvshmem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_nvshmem.py -------------------------------------------------------------------------------- /tests/comm/test_nvshmem_allreduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_nvshmem_allreduce.py -------------------------------------------------------------------------------- /tests/comm/test_trtllm_allreduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_trtllm_allreduce.py -------------------------------------------------------------------------------- /tests/comm/test_trtllm_allreduce_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_trtllm_allreduce_fusion.py -------------------------------------------------------------------------------- /tests/comm/test_trtllm_alltoall.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_trtllm_alltoall.py -------------------------------------------------------------------------------- /tests/comm/test_trtllm_mnnvl_allreduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_trtllm_mnnvl_allreduce.py -------------------------------------------------------------------------------- /tests/comm/test_trtllm_mnnvl_allreduce_custom_comm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_trtllm_mnnvl_allreduce_custom_comm.py -------------------------------------------------------------------------------- /tests/comm/test_trtllm_moe_allreduce_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_trtllm_moe_allreduce_fusion.py -------------------------------------------------------------------------------- /tests/comm/test_trtllm_moe_allreduce_fusion_finalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_trtllm_moe_allreduce_fusion_finalize.py -------------------------------------------------------------------------------- /tests/comm/test_vllm_custom_allreduce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/comm/test_vllm_custom_allreduce.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/gemm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/gemm/test_bmm_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/gemm/test_bmm_fp8.py -------------------------------------------------------------------------------- /tests/gemm/test_cute_dsl_blockscaled_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/gemm/test_cute_dsl_blockscaled_gemm.py -------------------------------------------------------------------------------- /tests/gemm/test_cute_dsl_gemm_allreduce_two_shot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/gemm/test_cute_dsl_gemm_allreduce_two_shot.py -------------------------------------------------------------------------------- /tests/gemm/test_group_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/gemm/test_group_gemm.py -------------------------------------------------------------------------------- /tests/gemm/test_groupwise_scaled_gemm_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/gemm/test_groupwise_scaled_gemm_fp8.py -------------------------------------------------------------------------------- /tests/gemm/test_groupwise_scaled_gemm_mxfp4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/gemm/test_groupwise_scaled_gemm_mxfp4.py -------------------------------------------------------------------------------- /tests/gemm/test_mm_fp4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/gemm/test_mm_fp4.py -------------------------------------------------------------------------------- /tests/gemm/test_mm_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/gemm/test_mm_fp8.py -------------------------------------------------------------------------------- /tests/gemm/test_sm_constraint_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/gemm/test_sm_constraint_gemm.py -------------------------------------------------------------------------------- /tests/gemm/test_tgv_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/gemm/test_tgv_gemm.py -------------------------------------------------------------------------------- /tests/model_optimizations/test_dsv3_fused_routing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/model_optimizations/test_dsv3_fused_routing.py -------------------------------------------------------------------------------- /tests/model_optimizations/test_dsv3_router_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/model_optimizations/test_dsv3_router_gemm.py -------------------------------------------------------------------------------- /tests/moe/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/moe/test_dpsk_fused_moe_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/moe/test_dpsk_fused_moe_fp8.py -------------------------------------------------------------------------------- /tests/moe/test_trtllm_cutlass_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/moe/test_trtllm_cutlass_fused_moe.py -------------------------------------------------------------------------------- /tests/moe/test_trtllm_gen_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/moe/test_trtllm_gen_fused_moe.py -------------------------------------------------------------------------------- /tests/moe/test_trtllm_gen_routed_fused_moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/moe/test_trtllm_gen_routed_fused_moe.py -------------------------------------------------------------------------------- /tests/moe/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/moe/utils.py -------------------------------------------------------------------------------- /tests/test_artifacts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/test_artifacts.py -------------------------------------------------------------------------------- /tests/test_helpers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_helpers/alibi_reference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/test_helpers/alibi_reference.py -------------------------------------------------------------------------------- /tests/test_helpers/jit_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/test_helpers/jit_utils.py -------------------------------------------------------------------------------- /tests/test_helpers/params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/test_helpers/params.py -------------------------------------------------------------------------------- /tests/test_helpers/rope_reference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/test_helpers/rope_reference.py -------------------------------------------------------------------------------- /tests/test_helpers/sink_attention_reference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/test_helpers/sink_attention_reference.py -------------------------------------------------------------------------------- /tests/test_helpers/test_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/test_helpers/test_helpers.py -------------------------------------------------------------------------------- /tests/test_helpers/utils_fp4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/test_helpers/utils_fp4.py -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/utils/test_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_activation.py -------------------------------------------------------------------------------- /tests/utils/test_block_sparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_block_sparse.py -------------------------------------------------------------------------------- /tests/utils/test_block_sparse_indices_to_vector_sparse_offsets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_block_sparse_indices_to_vector_sparse_offsets.py -------------------------------------------------------------------------------- /tests/utils/test_create_ipc_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_create_ipc_buffer.py -------------------------------------------------------------------------------- /tests/utils/test_decorators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_decorators.py -------------------------------------------------------------------------------- /tests/utils/test_fp4_quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_fp4_quantize.py -------------------------------------------------------------------------------- /tests/utils/test_fp4_tensor_torch_cute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_fp4_tensor_torch_cute.py -------------------------------------------------------------------------------- /tests/utils/test_fp8_quantize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_fp8_quantize.py -------------------------------------------------------------------------------- /tests/utils/test_green_ctx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_green_ctx.py -------------------------------------------------------------------------------- /tests/utils/test_jit_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_jit_example.py -------------------------------------------------------------------------------- /tests/utils/test_jit_warmup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_jit_warmup.py -------------------------------------------------------------------------------- /tests/utils/test_load_cubin_compile_race_condition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_load_cubin_compile_race_condition.py -------------------------------------------------------------------------------- /tests/utils/test_logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_logging.py -------------------------------------------------------------------------------- /tests/utils/test_logits_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_logits_processor.py -------------------------------------------------------------------------------- /tests/utils/test_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_norm.py -------------------------------------------------------------------------------- /tests/utils/test_pod_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_pod_kernels.py -------------------------------------------------------------------------------- /tests/utils/test_quantization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_quantization.py -------------------------------------------------------------------------------- /tests/utils/test_sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_sampling.py -------------------------------------------------------------------------------- /tests/utils/test_triton_cascade.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils/test_triton_cascade.py -------------------------------------------------------------------------------- /tests/utils_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flashinfer-ai/flashinfer/HEAD/tests/utils_fp8.py -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.5.2 2 | --------------------------------------------------------------------------------