├── web
    ├── apps
    │   ├── docs
    │   │   ├── content
    │   │   ├── postcss.config.js
    │   │   ├── mdx-components.js
    │   │   ├── next.config.ts
    │   │   ├── tsconfig.json
    │   │   ├── package.json
    │   │   ├── app
    │   │   │   ├── [[...mdxPath]]
    │   │   │   │   └── page.jsx
    │   │   │   └── layout.jsx
    │   │   └── gen_api_docs.sh
    │   └── web
    │   │   ├── .eslintrc.json
    │   │   ├── lib
    │   │       ├── schemas
    │   │       │   ├── index.ts
    │   │       │   └── model.ts
    │   │       ├── model-utils.ts
    │   │       └── utils.ts
    │   │   ├── app
    │   │       ├── icon.png
    │   │       ├── kernels
    │   │       │   └── [name]
    │   │       │   │   ├── solutions-types.ts
    │   │       │   │   ├── constraints.tsx
    │   │       │   │   ├── header.tsx
    │   │       │   │   └── page.tsx
    │   │       ├── layout.tsx
    │   │       ├── models
    │   │       │   ├── page.tsx
    │   │       │   └── [id]
    │   │       │   │   ├── page.tsx
    │   │       │   │   └── model-tabs.tsx
    │   │       ├── models.tsx
    │   │       ├── page.tsx
    │   │       └── leaderboard
    │   │       │   └── section.tsx
    │   │   ├── postcss.config.js
    │   │   ├── components
    │   │       ├── layout
    │   │       │   ├── footer.tsx
    │   │       │   └── header.tsx
    │   │       ├── fast-p-label.tsx
    │   │       └── model-card.tsx
    │   │   ├── microfrontends.json.disabled
    │   │   ├── middleware.ts
    │   │   ├── tsconfig.json
    │   │   ├── package.json
    │   │   ├── next.config.ts
    │   │   └── data
    │   │       └── baselines.ts
    ├── pnpm-workspace.yaml
    ├── packages
    │   ├── ui
    │   │   ├── src
    │   │   │   ├── brand
    │   │   │   │   ├── fib_logo.png
    │   │   │   │   ├── fib_mark.png
    │   │   │   │   ├── fib-black-bg.png
    │   │   │   │   ├── fib-white-bg.png
    │   │   │   │   └── Logo.tsx
    │   │   │   ├── providers
    │   │   │   │   └── Providers.tsx
    │   │   │   ├── index.ts
    │   │   │   └── components
    │   │   │   │   ├── label.tsx
    │   │   │   │   ├── textarea.tsx
    │   │   │   │   ├── input.tsx
    │   │   │   │   ├── hover-card.tsx
    │   │   │   │   ├── badge.tsx
    │   │   │   │   ├── avatar.tsx
    │   │   │   │   ├── alert.tsx
    │   │   │   │   ├── progress-circle.tsx
    │   │   │   │   ├── button.tsx
    │   │   │   │   ├── tabs.tsx
    │   │   │   │   ├── card.tsx
    │   │   │   │   ├── site-footer.tsx
    │   │   │   │   ├── site-header.tsx
    │   │   │   │   ├── toaster.tsx
    │   │   │   │   └── table.tsx
    │   │   ├── tsconfig.json
    │   │   └── package.json
    │   ├── utils
    │   │   ├── src
    │   │   │   └── index.ts
    │   │   ├── tsconfig.json
    │   │   └── package.json
    │   └── config
    │   │   ├── package.json
    │   │   ├── tsconfig.json
    │   │   └── src
    │   │       └── index.ts
    ├── package.json
    ├── turbo.json
    ├── README.md
    └── .gitignore
├── flashinfer_bench
    ├── integration
    │   ├── __init__.py
    │   ├── flashinfer
    │   │   ├── adapters
    │   │   │   ├── __init__.py
    │   │   │   └── rmsnorm.py
    │   │   ├── __init__.py
    │   │   └── common.py
    │   ├── utils.py
    │   └── patch_manager.py
    ├── cli
    │   └── __init__.py
    ├── bench
    │   ├── __init__.py
    │   ├── evaluators
    │   │   ├── __init__.py
    │   │   ├── registry.py
    │   │   ├── utils.py
    │   │   └── evaluator.py
    │   ├── runner
    │   │   ├── __init__.py
    │   │   └── runner.py
    │   └── config.py
    ├── compile
    │   ├── builders
    │   │   ├── __init__.py
    │   │   └── triton_builder.py
    │   ├── __init__.py
    │   └── utils.py
    ├── apply
    │   ├── __init__.py
    │   ├── config.py
    │   └── key.py
    ├── data
    │   ├── utils.py
    │   ├── __init__.py
    │   └── workload.py
    ├── tracing
    │   ├── workload_entry.py
    │   ├── __init__.py
    │   └── builtin
    │   │   └── configs.py
    ├── logging.py
    ├── __init__.py
    └── env.py
├── tests
    ├── __init__.py
    ├── integration
    │   ├── samplemods
    │   │   ├── __init__.py
    │   │   └── pm_dummy.py
    │   ├── __init__.py
    │   ├── test_utils.py
    │   └── test_patch_manager.py
    ├── bench
    │   └── test_benchmark_config.py
    ├── conftest.py
    ├── test_logging.py
    ├── apply
    │   └── test_key.py
    └── compile
    │   ├── test_triton_builder.py
    │   ├── test_python_builder.py
    │   └── test_utils.py
├── NOTICE
├── .gitmodules
├── docs
    ├── api
    │   ├── _static
    │   │   └── brand
    │   │   │   ├── fib-dark.png
    │   │   │   └── fib-light.png
    │   ├── requirements.txt
    │   ├── rst
    │   │   ├── schema_traceset.md
    │   │   ├── schema_solution.md
    │   │   ├── schema_definition.md
    │   │   ├── schema_trace.md
    │   │   ├── schema_workload.md
    │   │   ├── apply.md
    │   │   ├── tracing.md
    │   │   ├── schema.md
    │   │   └── compile.md
    │   ├── index.md
    │   └── build_docs.sh
    ├── start
    │   ├── installation.mdx
    │   └── quick_start.mdx
    ├── op_type_schema
    │   ├── gemm.md
    │   ├── rmsnorm.md
    │   ├── gqa_ragged.md
    │   ├── sampling.md
    │   ├── moe.md
    │   ├── gqa_paged.md
    │   └── mla_paged.md
    ├── index.mdx
    └── flashinfer_trace
    │   └── flashinfer_trace.md
├── examples
    ├── res
    │   └── win_at_p_curve_gemm_o3_gpt-5.png
    └── kernel_generator
    │   ├── .env.example
    │   └── README.md
├── .yamlfmt
├── scripts
    └── linting.sh
├── .github
    └── workflows
    │   ├── linting.yaml
    │   ├── unit_test.yaml
    │   └── build-and-upload-pypi.yml
├── RELEASE.md
├── .gitignore
├── .pre-commit-config.yaml
├── licenses
    └── cutlass.LICENSE.txt
├── README.md
├── pyproject.toml
└── CONTRIBUTING.md


/web/apps/docs/content:
--------------------------------------------------------------------------------
1 | ../../../docs


--------------------------------------------------------------------------------
/flashinfer_bench/integration/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Test package for flashinfer_bench."""
2 | 


--------------------------------------------------------------------------------
/web/pnpm-workspace.yaml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - "apps/*"
3 |   - "packages/*"
4 | 


--------------------------------------------------------------------------------
/web/apps/web/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "next/core-web-vitals"
3 | }
4 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | FlashInfer-Bench
2 | 
3 | Copyright (c) 2025 by FlashInfer-Bench Contributors
4 | 


--------------------------------------------------------------------------------
/flashinfer_bench/cli/__init__.py:
--------------------------------------------------------------------------------
1 | from .main import cli
2 | 
3 | __all__ = ["cli"]
4 | 


--------------------------------------------------------------------------------
/tests/integration/samplemods/__init__.py:
--------------------------------------------------------------------------------
1 | """Sample modules for integration tests."""
2 | 


--------------------------------------------------------------------------------
/web/apps/web/lib/schemas/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./model"
2 | export * from "./trace"
3 | 


--------------------------------------------------------------------------------
/flashinfer_bench/integration/flashinfer/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | """Adapters for flashinfer integration."""
2 | 


--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
1 | # Mark tests/integration as a package to allow 'tests.integration.*' imports
2 | 


--------------------------------------------------------------------------------
/web/apps/web/app/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flashinfer-ai/flashinfer-bench/HEAD/web/apps/web/app/icon.png


--------------------------------------------------------------------------------
/web/apps/docs/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: {
3 |     '@tailwindcss/postcss': {},
4 |   },
5 | }
6 | 


--------------------------------------------------------------------------------
/web/apps/web/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: {
3 |     '@tailwindcss/postcss': {},
4 |   },
5 | }
6 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "thirdparty/cutlass"]
2 | 	path = thirdparty/cutlass
3 | 	url = https://github.com/NVIDIA/cutlass
4 | 


--------------------------------------------------------------------------------
/docs/api/_static/brand/fib-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flashinfer-ai/flashinfer-bench/HEAD/docs/api/_static/brand/fib-dark.png


--------------------------------------------------------------------------------
/docs/api/_static/brand/fib-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flashinfer-ai/flashinfer-bench/HEAD/docs/api/_static/brand/fib-light.png


--------------------------------------------------------------------------------
/web/packages/ui/src/brand/fib_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flashinfer-ai/flashinfer-bench/HEAD/web/packages/ui/src/brand/fib_logo.png


--------------------------------------------------------------------------------
/web/packages/ui/src/brand/fib_mark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flashinfer-ai/flashinfer-bench/HEAD/web/packages/ui/src/brand/fib_mark.png


--------------------------------------------------------------------------------
/web/packages/ui/src/brand/fib-black-bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flashinfer-ai/flashinfer-bench/HEAD/web/packages/ui/src/brand/fib-black-bg.png


--------------------------------------------------------------------------------
/web/packages/ui/src/brand/fib-white-bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flashinfer-ai/flashinfer-bench/HEAD/web/packages/ui/src/brand/fib-white-bg.png


--------------------------------------------------------------------------------
/examples/res/win_at_p_curve_gemm_o3_gpt-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flashinfer-ai/flashinfer-bench/HEAD/examples/res/win_at_p_curve_gemm_o3_gpt-5.png


--------------------------------------------------------------------------------
/web/apps/web/components/layout/footer.tsx:
--------------------------------------------------------------------------------
1 | import { SiteFooter } from "@flashinfer-bench/ui"
2 | 
3 | export function Footer() {
4 |   return <SiteFooter />
5 | }
6 | 


--------------------------------------------------------------------------------
/tests/integration/samplemods/pm_dummy.py:
--------------------------------------------------------------------------------
1 | class Foo:
2 |     def instance_method(self, x, y=2):
3 |         return x + y
4 | 
5 | 
6 | def module_function(a, b=3):
7 |     return a * b
8 | 


--------------------------------------------------------------------------------
/docs/api/requirements.txt:
--------------------------------------------------------------------------------
1 | autodoc_pydantic>=2.1,<3
2 | myst-parser[linkify]>=4,<5
3 | setuptools-scm>=8
4 | shibuya
5 | sphinx==7.4.*
6 | sphinx-tabs>=3.4.7
7 | sphinx-toolbox
8 | tomli
9 | 


--------------------------------------------------------------------------------
/.yamlfmt:
--------------------------------------------------------------------------------
1 | formatter:
2 |   indent: 2
3 |   retain_line_breaks_single: true
4 |   max_line_length: 100
5 |   # avoid replacing newline with #magic___^_^___line
6 |   scan_folded_as_literal: true
7 | 


--------------------------------------------------------------------------------
/flashinfer_bench/bench/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | 
3 | from .benchmark import Benchmark
4 | from .config import BenchmarkConfig
5 | 
6 | __all__ = ["Benchmark", "BenchmarkConfig"]
7 | 


--------------------------------------------------------------------------------
/web/packages/utils/src/index.ts:
--------------------------------------------------------------------------------
1 | import { type ClassValue, clsx } from "clsx"
2 | import { twMerge } from "tailwind-merge"
3 | 
4 | export function cn(...inputs: ClassValue[]) {
5 |   return twMerge(clsx(inputs))
6 | }
7 | 


--------------------------------------------------------------------------------
/docs/api/rst/schema_traceset.md:
--------------------------------------------------------------------------------
 1 | # flashinfer_bench.data.TraceSet
 2 | 
 3 | ```{eval-rst}
 4 | .. currentmodule:: flashinfer_bench.data
 5 | 
 6 | .. autoclass:: TraceSet
 7 |     :members:
 8 |     :exclude-members: __init__
 9 | ```
10 | 


--------------------------------------------------------------------------------
/scripts/linting.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -eo pipefail
 3 | set -x
 4 | echo "Linting..."
 5 | 
 6 | # Check if ruff is available, if not install it
 7 | if ! command -v ruff &> /dev/null; then
 8 |     echo "ruff not found, installing ruff..." && pip install ruff
 9 | fi
10 | 
11 | ruff check . --fix
12 | 


--------------------------------------------------------------------------------
/flashinfer_bench/bench/evaluators/__init__.py:
--------------------------------------------------------------------------------
1 | from .default import DefaultEvaluator
2 | from .lowbit import LowBitEvaluator
3 | from .registry import resolve_evaluator
4 | from .sampling import SamplingEvaluator
5 | 
6 | __all__ = ["DefaultEvaluator", "LowBitEvaluator", "SamplingEvaluator", "resolve_evaluator"]
7 | 


--------------------------------------------------------------------------------
/docs/api/rst/schema_solution.md:
--------------------------------------------------------------------------------
 1 | # flashinfer_bench.data.Solution
 2 | 
 3 | ```{eval-rst}
 4 | .. currentmodule:: flashinfer_bench.data
 5 | 
 6 | .. autopydantic_model:: Solution
 7 | 
 8 | .. autoclass:: SupportedLanguages
 9 |     :members:
10 | 
11 | .. autopydantic_model:: SourceFile
12 | 
13 | .. autopydantic_model:: BuildSpec
14 | ```
15 | 


--------------------------------------------------------------------------------
/web/apps/docs/mdx-components.js:
--------------------------------------------------------------------------------
 1 | import { useMDXComponents as getThemeComponents } from 'nextra-theme-docs'
 2 | 
 3 | // Get the default MDX components
 4 | const themeComponents = getThemeComponents()
 5 | 
 6 | // Merge components
 7 | export function useMDXComponents(components) {
 8 |   return {
 9 |     ...themeComponents,
10 |     ...components
11 |   }
12 | }
13 | 


--------------------------------------------------------------------------------
/web/apps/web/components/layout/header.tsx:
--------------------------------------------------------------------------------
 1 | import { SiteHeader } from "@flashinfer-bench/ui"
 2 | import { docsBasePath } from "@flashinfer-bench/config"
 3 | 
 4 | const NAV_ITEMS = [
 5 |   { href: docsBasePath, label: "Docs", external: true },
 6 |   { href: "/viewer", label: "Viewer" },
 7 | ]
 8 | 
 9 | export function Header() {
10 |   return <SiteHeader navItems={NAV_ITEMS} />
11 | }
12 | 


--------------------------------------------------------------------------------
/web/packages/config/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@flashinfer-bench/config",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "main": "src/index.ts",
 6 |   "types": "src/index.ts",
 7 |   "license": "UNLICENSED",
 8 |   "sideEffects": false,
 9 |   "exports": {
10 |     ".": {
11 |       "types": "./src/index.ts",
12 |       "import": "./src/index.ts"
13 |     }
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/flashinfer_bench/compile/builders/__init__.py:
--------------------------------------------------------------------------------
1 | """Concrete builder implementations for different languages and build systems."""
2 | 
3 | from .python_builder import PythonBuilder
4 | from .torch_builder import TorchBuilder
5 | from .triton_builder import TritonBuilder
6 | from .tvm_ffi_builder import TVMFFIBuilder
7 | 
8 | __all__ = ["TorchBuilder", "PythonBuilder", "TritonBuilder", "TVMFFIBuilder"]
9 | 


--------------------------------------------------------------------------------
/docs/api/rst/schema_definition.md:
--------------------------------------------------------------------------------
 1 | # flashinfer_bench.data.Definition
 2 | 
 3 | ```{eval-rst}
 4 | .. currentmodule:: flashinfer_bench.data
 5 | 
 6 | .. autopydantic_model:: Definition
 7 | 
 8 | .. autopydantic_model:: AxisConst
 9 | 
10 | .. autopydantic_model:: AxisVar
11 | 
12 | .. autopydantic_model:: TensorSpec
13 | 
14 | .. autoclass:: flashinfer_bench.data.definition.DType
15 |     :members:
16 | ```
17 | 


--------------------------------------------------------------------------------
/web/packages/config/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "composite": false,
 4 |     "declaration": false,
 5 |     "skipLibCheck": true,
 6 |     "module": "ESNext",
 7 |     "moduleResolution": "Bundler",
 8 |     "jsx": "preserve",
 9 |     "target": "ES2022",
10 |     "strict": true,
11 |     "noEmit": true,
12 |     "isolatedModules": true
13 |   },
14 |   "include": ["src/**/*"]
15 | }
16 | 


--------------------------------------------------------------------------------
/web/packages/utils/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "composite": false,
 4 |     "declaration": false,
 5 |     "skipLibCheck": true,
 6 |     "module": "ESNext",
 7 |     "moduleResolution": "Bundler",
 8 |     "jsx": "preserve",
 9 |     "target": "ES2022",
10 |     "strict": true,
11 |     "noEmit": true,
12 |     "isolatedModules": true
13 |   },
14 |   "include": ["src/**/*"]
15 | }
16 | 


--------------------------------------------------------------------------------
/flashinfer_bench/apply/__init__.py:
--------------------------------------------------------------------------------
 1 | from .apply_api import apply, disable_apply, enable_apply
 2 | from .config import ApplyConfig
 3 | from .runtime import ApplyRuntime, get_apply_runtime, set_apply_runtime
 4 | 
 5 | __all__ = [
 6 |     "apply",
 7 |     "disable_apply",
 8 |     "enable_apply",
 9 |     "get_apply_runtime",
10 |     "set_apply_runtime",
11 |     "ApplyConfig",
12 |     "ApplyRuntime",
13 | ]
14 | 


--------------------------------------------------------------------------------
/docs/api/rst/schema_trace.md:
--------------------------------------------------------------------------------
 1 | # flashinfer_bench.data.Trace
 2 | 
 3 | ```{eval-rst}
 4 | .. currentmodule:: flashinfer_bench.data
 5 | 
 6 | .. autopydantic_model:: Trace
 7 | 
 8 | .. autopydantic_model:: Correctness
 9 | 
10 | .. autopydantic_model:: Performance
11 | 
12 | .. autopydantic_model:: Environment
13 | 
14 | .. autoclass:: EvaluationStatus
15 |     :members:
16 | 
17 | .. autopydantic_model:: Evaluation
18 | ```
19 | 


--------------------------------------------------------------------------------
/web/apps/docs/next.config.ts:
--------------------------------------------------------------------------------
 1 | import type { NextConfig } from 'next'
 2 | import nextra from 'nextra'
 3 | 
 4 | const withNextra = nextra({})
 5 | 
 6 | const config: NextConfig = {
 7 |   reactStrictMode: true,
 8 |   basePath: '/docs',
 9 |   transpilePackages: [
10 |     '@flashinfer-bench/ui',
11 |     '@flashinfer-bench/utils',
12 |     '@flashinfer-bench/config',
13 |   ],
14 | }
15 | 
16 | export default withNextra(config)
17 | 


--------------------------------------------------------------------------------
/docs/api/rst/schema_workload.md:
--------------------------------------------------------------------------------
 1 | # flashinfer_bench.data.Workload
 2 | 
 3 | ```{eval-rst}
 4 | .. currentmodule:: flashinfer_bench.data
 5 | 
 6 | .. autopydantic_model:: Workload
 7 | 
 8 | .. autopydantic_model:: RandomInput
 9 | 
10 | .. autopydantic_model:: ScalarInput
11 | 
12 | .. autopydantic_model:: SafetensorsInput
13 | 
14 | .. autodata:: InputSpec
15 | 
16 |    Union type representing all possible input specification types.
17 | ```
18 | 


--------------------------------------------------------------------------------
/flashinfer_bench/bench/runner/__init__.py:
--------------------------------------------------------------------------------
 1 | from .isolated_runner import IsolatedRunner
 2 | from .persistent_runner import PersistentRunner
 3 | from .runner import BaselineHandle, DeviceBaseline, RunnerError, RunnerFatalError
 4 | 
 5 | __all__ = [
 6 |     # General Runner
 7 |     "BaselineHandle",
 8 |     "DeviceBaseline",
 9 |     "RunnerError",
10 |     "RunnerFatalError",
11 |     # Specialized Runners
12 |     "IsolatedRunner",
13 |     "PersistentRunner",
14 | ]
15 | 


--------------------------------------------------------------------------------
/web/packages/utils/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@flashinfer-bench/utils",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "main": "src/index.ts",
 6 |   "types": "src/index.ts",
 7 |   "license": "UNLICENSED",
 8 |   "sideEffects": false,
 9 |   "exports": {
10 |     ".": {
11 |       "types": "./src/index.ts",
12 |       "import": "./src/index.ts"
13 |     }
14 |   },
15 |   "dependencies": {
16 |     "clsx": "^2.1.1",
17 |     "tailwind-merge": "^3.3.1"
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/docs/api/rst/apply.md:
--------------------------------------------------------------------------------
 1 | # flashinfer_bench.apply
 2 | 
 3 | `flashinfer_bench.apply` provides a tool that meets two needs:
 4 | 
 5 | 1. **Apply** best-performing one from FlashInfer Trace database to the LLM engine
 6 | 2. **Trace** the kernel in the LLM engine and dump its input as FlashInfer Trace's workload format
 7 | 
 8 | ```{eval-rst}
 9 | .. currentmodule:: flashinfer_bench
10 | 
11 | .. autofunction:: apply
12 | 
13 | .. autofunction:: enable_apply
14 | 
15 | .. autofunction:: disable_apply
16 | ```
17 | 


--------------------------------------------------------------------------------
/web/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "flashinfer-bench",
 3 |   "private": true,
 4 |   "scripts": {
 5 |     "dev": "turbo run dev",
 6 |     "build": "turbo run build",
 7 |     "lint": "turbo run lint",
 8 |     "format": "prettier --write \"**/*.{ts,tsx,md}\""
 9 |   },
10 |   "devDependencies": {
11 |     "eslint": "^8",
12 |     "eslint-config-custom": "^0.0.0",
13 |     "prettier": "^3.6.2",
14 |     "turbo": "^2.5.4"
15 |   },
16 |   "packageManager": "pnpm@10.14.0",
17 |   "engines": {
18 |     "node": ">=22"
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/web/turbo.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://turbo.build/schema.json",
 3 |   "globalEnv": [
 4 |     "FLASHINFER_TRACE_PATH"
 5 |   ],
 6 |   "globalDependencies": ["**/.env.*local"],
 7 |   "tasks": {
 8 |     "build": {
 9 |       "dependsOn": ["^build"],
10 |       "outputs": [".next/**", "!.next/cache/**", "public/docs/**"]
11 |     },
12 |     "dev": {
13 |       "cache": false,
14 |       "persistent": true
15 |     },
16 |     "lint": {},
17 |     "type-check": {
18 |       "dependsOn": ["^build"]
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/.github/workflows/linting.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |   pull_request:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   linting:
10 |     name: Pre-check on Ubuntu-latest
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Checkout
14 |         uses: actions/checkout@v4
15 |         with:
16 |           submodules: recursive
17 | 
18 |       - name: Pre-commit
19 |         uses: pre-commit/action@v3.0.1
20 | 
21 |       - name: Linting
22 |         run: |
23 |           ./scripts/linting.sh
24 | 


--------------------------------------------------------------------------------
/web/README.md:
--------------------------------------------------------------------------------
 1 | # FlashInfer Hub
 2 | 
 3 | Community-driven ecosystem for high-performance kernels.
 4 | 
 5 | ## Getting Started
 6 | 
 7 | 1. Install dependencies:
 8 | ```bash
 9 | pnpm install
10 | ```
11 | 
12 | 2. Start the development server:
13 | ```bash
14 | pnpm dev
15 | ```
16 | 
17 | 3. Open [http://localhost:3000](http://localhost:3000) in your browser.
18 | 
19 | ## Environment Variables
20 | 
21 | Create a `.env.local` file in `apps/web/` with:
22 | 
23 | ## Project Structure
24 | 
25 | - `apps/web/` - Next.js web application
26 | 


--------------------------------------------------------------------------------
/examples/kernel_generator/.env.example:
--------------------------------------------------------------------------------
 1 | # API Configuration
 2 | # Copy this file to .env and fill in your actual API keys and URLs
 3 | # Choose API Configuration based on model used by KernelGenerator
 4 | 
 5 | # OpenAI API Configuration
 6 | LLM_API_KEY=your_openai_api_key_here
 7 | 
 8 | # Claude API Configuration
 9 | LLM_API_KEY=your_claude_api_key_here
10 | BASE_URL=https://api.anthropic.com/v1/
11 | 
12 | # Gemini API Configuration
13 | LLM_API_KEY=your_gemini_api_key_here
14 | BASE_URL=https://generativelanguage.googleapis.com/v1beta/
15 | 


--------------------------------------------------------------------------------
/web/apps/web/microfrontends.json.disabled:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://openapi.vercel.sh/microfrontends.json",
 3 |     "applications": {
 4 |       "flashinfer-bench": {
 5 |         "packageName": "@flashinfer-bench/web",
 6 |         "development": {
 7 |           "fallback": "bench.flashinfer.ai"
 8 |         }
 9 |       },
10 |       "flashinfer-bench-docs": {
11 |         "packageName": "@flashinfer-bench/docs",
12 |         "routing": [
13 |           {
14 |             "paths": ["/docs", "/docs/:path*"]
15 |           }
16 |         ]
17 |       }
18 |     }
19 |   }
20 | 


--------------------------------------------------------------------------------
/flashinfer_bench/data/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated
 2 | 
 3 | from pydantic import BaseModel, ConfigDict, Field
 4 | 
 5 | NonEmptyString = Annotated[str, Field(min_length=1)]
 6 | """Type alias for non-empty strings with minimum length of 1."""
 7 | 
 8 | NonNegativeInt = Annotated[int, Field(ge=0)]
 9 | """Type alias for non-negative integers."""
10 | 
11 | 
12 | class BaseModelWithDocstrings(BaseModel):
13 |     """Base model with the attribute docstrings being extracted to the model JSON schema."""
14 | 
15 |     model_config = ConfigDict(use_attribute_docstrings=True)
16 | 


--------------------------------------------------------------------------------
/web/apps/web/app/kernels/[name]/solutions-types.ts:
--------------------------------------------------------------------------------
 1 | import type { CurvePoint } from "@/lib/analytics"
 2 | 
 3 | export type CorrectnessStats = {
 4 |   total: number
 5 |   passed: number
 6 |   incorrect: number
 7 |   runtime_error: number
 8 |   other: number
 9 | }
10 | 
11 | export type CurvesPayload = {
12 |   nWorkloads: number
13 |   curves: Record<string, CurvePoint[]>
14 |   correctness: Record<string, CorrectnessStats>
15 | }
16 | 
17 | export type SolutionFiltersState = {
18 |   languages: string[]
19 |   authors: string[]
20 |   targets: string[]
21 |   search: string
22 | }
23 | 


--------------------------------------------------------------------------------
/web/apps/web/lib/model-utils.ts:
--------------------------------------------------------------------------------
 1 | import { Model } from "./schemas"
 2 | 
 3 | /**
 4 |  * Get children of a module
 5 |  */
 6 | export function getChildren(model: Model, moduleName: string): string[] {
 7 |   return Object.entries(model.modules)
 8 |     .filter(([_, module]) => module.parent === moduleName)
 9 |     .map(([name]) => name)
10 | }
11 | 
12 | /**
13 |  * Get root modules (modules with no parent)
14 |  */
15 | export function getRootModules(model: Model): string[] {
16 |   return Object.entries(model.modules)
17 |     .filter(([_, module]) => !module.parent)
18 |     .map(([name]) => name)
19 | }
20 | 


--------------------------------------------------------------------------------
/web/packages/ui/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "composite": false,
 4 |     "declaration": false,
 5 |     "skipLibCheck": true,
 6 |     "module": "ESNext",
 7 |     "moduleResolution": "Bundler",
 8 |     "jsx": "preserve",
 9 |     "target": "ES2022",
10 |     "strict": true,
11 |     "noEmit": true,
12 |     "isolatedModules": true,
13 |     "types": ["react", "react-dom"],
14 |     "baseUrl": ".",
15 |     "paths": {
16 |       "@flashinfer-bench/utils": ["../utils/src/index.ts"],
17 |       "@flashinfer-bench/config": ["../config/src/index.ts"]
18 |     }
19 |   },
20 |   "include": ["src/**/*"]
21 | }
22 | 


--------------------------------------------------------------------------------
/web/apps/web/components/fast-p-label.tsx:
--------------------------------------------------------------------------------
 1 | import { cn } from "@flashinfer-bench/utils"
 2 | 
 3 | export type FastPLabelProps = {
 4 |   className?: string
 5 |   value?: string | number
 6 | }
 7 | 
 8 | export function FastPLabel({ className, value }: FastPLabelProps) {
 9 |   const subscript = value !== undefined ? String(value) : "p"
10 |   return (
11 |     <span
12 |       className={cn("inline-flex items-baseline gap-0.5", className)}
13 |       aria-label={`fast sub ${subscript}`}
14 |     >
15 |       <span>fast</span>
16 |       <sub className="text-[0.65em] leading-none">{subscript}</sub>
17 |     </span>
18 |   )
19 | }
20 | 


--------------------------------------------------------------------------------
/web/apps/web/middleware.ts:
--------------------------------------------------------------------------------
 1 | import { NextResponse } from "next/server"
 2 | import type { NextRequest } from "next/server"
 3 | 
 4 | export function middleware(request: NextRequest) {
 5 |   // Allow all routes for the static leaderboard site
 6 |   return NextResponse.next()
 7 | }
 8 | 
 9 | export const config = {
10 |   matcher: [
11 |     /*
12 |      * Match all request paths except:
13 |      * - _next/static (static files)
14 |      * - _next/image (image optimization files)
15 |      * - favicon.ico (favicon file)
16 |      * - public files
17 |      */
18 |     "/((?!_next/static|_next/image|favicon.ico|.*\\.(?:svg|png|jpg|jpeg|gif|webp)$).*)",
19 |   ],
20 | }
21 | 


--------------------------------------------------------------------------------
/web/packages/config/src/index.ts:
--------------------------------------------------------------------------------
 1 | export const siteName = 'FlashInfer-Bench'
 2 | export const siteDescription = 'AI for AI Infrastructure for Accelerating AI Deployment'
 3 | 
 4 | export const links = {
 5 |   org: 'https://github.com/flashinfer-ai',
 6 |   siteRepo: 'https://github.com/flashinfer-ai/flashinfer-bench',
 7 |   docsRepositoryBase: 'https://github.com/flashinfer-ai/flashinfer-bench/tree/main/docs',
 8 | }
 9 | 
10 | export const docsBasePath = '/docs'
11 | 
12 | export const env = {
13 |   docsOriginVar: 'DOCS_ORIGIN',
14 | }
15 | 
16 | export function getDefaultMetadata() {
17 |   return {
18 |     title: siteName,
19 |     description: siteDescription,
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/docs/api/rst/tracing.md:
--------------------------------------------------------------------------------
 1 | # flashinfer_bench.tracing
 2 | 
 3 | `flashinfer_bench.tracing` provides tools for tracing kernel executions during LLM inference
 4 | and collecting workload traces for the FlashInfer Trace database. This module enables:
 5 | 
 6 | 1. **Workload Collection**: Capture kernel inputs and execution patterns during runtime
 7 | 2. **Configurable Tracing**: Control what data to collect and how to deduplicate or filter traces
 8 | 3. **Filter Policies**: Apply policies to reduce redundant traces and manage dataset size
 9 | 
10 | ```{eval-rst}
11 | .. currentmodule:: flashinfer_bench
12 | 
13 | .. autofunction:: enable_tracing
14 | 
15 | .. autofunction:: disable_tracing
16 | ```
17 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/providers/Providers.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import { QueryClient, QueryClientProvider } from "@tanstack/react-query"
 4 | import { ThemeProvider } from "next-themes"
 5 | import { useState } from "react"
 6 | 
 7 | export function Providers({ children }: { children: React.ReactNode }) {
 8 |   const [queryClient] = useState(() => new QueryClient())
 9 | 
10 |   return (
11 |     <QueryClientProvider client={queryClient}>
12 |       <ThemeProvider
13 |         attribute="class"
14 |         defaultTheme="system"
15 |         enableSystem
16 |         disableTransitionOnChange
17 |       >
18 |         {children}
19 |       </ThemeProvider>
20 |     </QueryClientProvider>
21 |   )
22 | }
23 | 


--------------------------------------------------------------------------------
/docs/start/installation.mdx:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Prerequisites
 4 | 
 5 | - OS: Linux
 6 | - Python: 3.10, 3.11, 3.12, 3.13
 7 | 
 8 | ## Python Package (WIP)
 9 | 
10 | FlashInfer-Bench is available via pip:
11 | 
12 | ```bash
13 | python3 -m pip install flashinfer-bench
14 | ```
15 | 
16 | ## Install from Source
17 | 
18 | You may want to install FlashInfer-Bench from source code for development purposes.
19 | 
20 | ```bash
21 | # Clone the FlashInfer-Bench repository
22 | git clone https://github.com/flashinfer-ai/flashinfer-bench.git
23 | 
24 | # Install the Python package
25 | pip install -v -e .
26 | ```
27 | 
28 | ## Verify installation
29 | 
30 | ```python
31 | >>> import flashinfer_bench as flb
32 | >>> flb.__version__
33 | '0.0.1'
34 | ```
35 | 


--------------------------------------------------------------------------------
/docs/op_type_schema/gemm.md:
--------------------------------------------------------------------------------
 1 | # gemm
 2 | 
 3 | General Matrix Multiplication (GEMM) operation that computes C = A × B^T. This is a fundamental linear algebra operation used in neural networks for layer computations, attention mechanisms, and other matrix transformations.
 4 | 
 5 | Variants:
 6 | - FP16 GEMM: Uses 16-bit floating point (FP16) inputs for A and B matrices
 7 | - FP8 GEMM: Uses 8-bit floating point (FP8) inputs for A and B matrices, with scaling factors to maintain numerical stability
 8 | 
 9 | Axes (3 dimensions):
10 | - `M`: variable
11 | - `N`, `K`: constant
12 | 
13 | Inputs (2 or 4 tensors):
14 | - `A`: [M, K]
15 | - `B`: [N, K]
16 | - Scaling factors for FP8 GEMM:
17 |     - `A_scale`: [M]
18 |     - `B_scale`: [N]
19 | 
20 | Outputs (1 tensor):
21 | - `C`: [M, N]
22 | 


--------------------------------------------------------------------------------
/docs/op_type_schema/rmsnorm.md:
--------------------------------------------------------------------------------
 1 | # rmsnorm
 2 | 
 3 |  Root Mean Square Layer Normalization (RMSNorm) is a normalization technique that normalizes the input by the root mean square of its elements.
 4 | 
 5 | Variants:
 6 | - Standard RMSNorm: basic RMS normalization that scales input by RMS and applies learned weight parameters
 7 | - Fused Add RMSNorm: adds residual connection before normalization in a single fused operation
 8 | 
 9 | Axes (2 dimensions):
10 | - `batch_size`: variable
11 | - `hidden_size`: constant
12 | 
13 | Inputs (2 or 3 tensors):
14 | - `hidden_states`: [batch_size, hidden_size]
15 | - `weight`: [hidden_size]
16 | - For Fused Add RMSNorm only:
17 |     - `residual`: [batch_size, hidden_size]
18 | 
19 | Outputs (1 tensor):
20 | - `output`: [batch_size, hidden_size]
21 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/index.ts:
--------------------------------------------------------------------------------
 1 | export * from './components/alert'
 2 | export * from './components/avatar'
 3 | export * from './components/badge'
 4 | export * from './components/button'
 5 | export * from './components/card'
 6 | export * from './components/dropdown-menu'
 7 | export * from './components/hover-card'
 8 | export * from './components/input'
 9 | export * from './components/label'
10 | export * from './components/select'
11 | export * from './components/table'
12 | export * from './components/tabs'
13 | export * from './components/textarea'
14 | export * from './components/progress-circle'
15 | export * from './components/toaster'
16 | 
17 | export * from './providers/Providers'
18 | export * from './components/site-header'
19 | export * from './components/site-footer'
20 | 


--------------------------------------------------------------------------------
/tests/bench/test_benchmark_config.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import pytest
 4 | 
 5 | from flashinfer_bench.bench import BenchmarkConfig
 6 | 
 7 | 
 8 | def test_benchmark_config_defaults_valid():
 9 |     cfg = BenchmarkConfig()
10 |     assert cfg.warmup_runs >= 0
11 |     assert cfg.iterations > 0
12 |     assert cfg.num_trials > 0
13 |     assert cfg.rtol > 0 and cfg.atol > 0
14 | 
15 | 
16 | @pytest.mark.parametrize(
17 |     "field, value",
18 |     [("warmup_runs", -1), ("iterations", 0), ("num_trials", 0), ("rtol", 0.0), ("atol", 0.0)],
19 | )
20 | def test_benchmark_config_validation(field, value):
21 |     kwargs = {}
22 |     kwargs[field] = value
23 |     with pytest.raises(ValueError):
24 |         BenchmarkConfig(**kwargs)
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     pytest.main(sys.argv)
29 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Release Process
 2 | 
 3 | This project uses `setuptools_scm` for automatic version management from git tags.
 4 | 
 5 | ## Workflow
 6 | 
 7 | ### Option 1: GitHub GUI (Recommended)
 8 | 
 9 | 1. Go to [GitHub Releases](https://github.com/flashinfer-ai/flashinfer-bench/releases)
10 | 2. Click "Draft a new release"
11 | 3. Click "Choose a tag" → Type tag name (e.g., `v0.1.0`) → "Create new tag on publish"
12 | 4. Fill in release notes
13 | 5. Click "Publish release"
14 | 6. PyPI publish automatically
15 | 
16 | ### Option 2: Command Line
17 | 
18 | ```bash
19 | # Create and push tag
20 | git tag v0.1.0rc1
21 | git push origin v0.1.0rc1
22 | 
23 | # Then create GitHub Release (manual)
24 | # PyPI publish automatically
25 | ```
26 | 
27 | ## Version Format
28 | 
29 | - `v0.1.0` - Stable
30 | - `v0.1.0rc1` - Release candidate
31 | 


--------------------------------------------------------------------------------
/docs/api/index.md:
--------------------------------------------------------------------------------
 1 | # Welcome to FlashInfer-Bench Python API documentation!
 2 | 
 3 | [Blog Post](https://flashinfer.ai/2025/10/21/flashinfer-bench.html) | [GitHub](https://github.com/flashinfer-ai/flashinfer-bench/) | [Slack (Join channel #flashinfer-bench)](https://join.slack.com/t/flashinfer/shared_invite/zt-379wct3hc-D5jR~1ZKQcU00WHsXhgvtA)
 4 | 
 5 | FlashInfer-Bench is a comprehensive benchmark and infrastructure designed to create a "virtuous cycle" where AI can automatically optimize and improve the core GPU kernels of the AI systems it runs on. It provides a systematic framework to identify performance bottlenecks, generate solutions, and deploy them immediately into production.
 6 | 
 7 | ```{toctree}
 8 | :maxdepth: 2
 9 | :caption: API Reference
10 | 
11 | rst/schema
12 | rst/apply
13 | rst/tracing
14 | rst/compile
15 | ```
16 | 


--------------------------------------------------------------------------------
/web/apps/docs/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2022",
 4 |     "module": "ESNext",
 5 |     "jsx": "preserve",
 6 |     "strict": true,
 7 |     "baseUrl": ".",
 8 |     "skipLibCheck": true,
 9 |     "esModuleInterop": true,
10 |     "moduleResolution": "bundler",
11 |     "resolveJsonModule": true,
12 |     "allowJs": true,
13 |     "noEmit": true,
14 |     "lib": [
15 |       "dom",
16 |       "dom.iterable",
17 |       "esnext"
18 |     ],
19 |     "incremental": true,
20 |     "isolatedModules": true,
21 |     "plugins": [
22 |       {
23 |         "name": "next"
24 |       }
25 |     ]
26 |   },
27 |   "include": [
28 |     "**/*.md",
29 |     "**/*.mdx",
30 |     "**/*.ts",
31 |     "**/*.tsx",
32 |     ".next/types/**/*.ts"
33 |   ],
34 |   "exclude": [
35 |     "node_modules"
36 |   ]
37 | }
38 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/label.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import * as LabelPrimitive from "@radix-ui/react-label"
 3 | import { cva, type VariantProps } from "class-variance-authority"
 4 | 
 5 | import { cn } from "@flashinfer-bench/utils"
 6 | 
 7 | const labelVariants = cva(
 8 |   "text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70"
 9 | )
10 | 
11 | const Label = React.forwardRef<
12 |   HTMLLabelElement,
13 |   React.ComponentPropsWithoutRef<typeof LabelPrimitive.Root> &
14 |     VariantProps<typeof labelVariants>
15 | >(({ className, ...props }, ref) => (
16 |   <LabelPrimitive.Root
17 |     ref={ref}
18 |     className={cn(labelVariants(), className)}
19 |     {...props}
20 |   />
21 | ))
22 | Label.displayName = LabelPrimitive.Root.displayName
23 | 
24 | export { Label }
25 | 


--------------------------------------------------------------------------------
/web/apps/docs/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@flashinfer-bench/docs",
 3 |   "private": true,
 4 |   "version": "0.1.0",
 5 |   "scripts": {
 6 |     "dev": "next dev -p 3030",
 7 |     "build": "next build",
 8 |     "start": "next start -p 3030",
 9 |     "api-docs": "bash ./gen_api_docs.sh"
10 |   },
11 |   "dependencies": {
12 |     "@flashinfer-bench/config": "workspace:*",
13 |     "@flashinfer-bench/ui": "workspace:*",
14 |     "next": "^15.5.0",
15 |     "nextra": "^4.4.0",
16 |     "nextra-theme-docs": "^4.4.0",
17 |     "react": "^19.1.1",
18 |     "react-dom": "^19.1.1"
19 |   },
20 |   "devDependencies": {
21 |     "@tailwindcss/postcss": "^4.1.11",
22 |     "@types/react": "^19.1.13",
23 |     "@types/react-dom": "^19.1.9",
24 |     "postcss": "^8.5.6",
25 |     "tailwindcss": "^4.1.11",
26 |     "typescript": "^5.8.3"
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/docs/api/rst/schema.md:
--------------------------------------------------------------------------------
 1 | # FlashInfer Trace Schema
 2 | 
 3 | FlashInfer-Bench provides a schema for the FlashInfer Trace database. This document includes
 4 | the Python API for the schema, including
 5 | 
 6 | - The {py:class}`~flashinfer_bench.data.Definition` class, which defines the kernel specification.
 7 | - The {py:class}`~flashinfer_bench.data.Solution` class, which defines the kernel implementation.
 8 | - The {py:class}`~flashinfer_bench.data.Workload` class, which defines the kernel's input tensors.
 9 | - The {py:class}`~flashinfer_bench.data.Trace` class, which defines the kernel execution trace.
10 | - The {py:class}`~flashinfer_bench.data.TraceSet` class, which defines a set of kernel execution traces.
11 | 
12 | ```{toctree}
13 | :maxdepth: 2
14 | 
15 | schema_definition
16 | schema_solution
17 | schema_workload
18 | schema_trace
19 | schema_traceset
20 | ```
21 | 


--------------------------------------------------------------------------------
/flashinfer_bench/tracing/workload_entry.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Any, Dict, Optional
 3 | 
 4 | 
 5 | @dataclass
 6 | class WorkloadEntry:
 7 |     """In-memory buffer entry for collected workloads."""
 8 | 
 9 |     def_name: str
10 |     """Name of the definition this workload entry belongs to."""
11 | 
12 |     axes: Dict[str, int]
13 |     """Dictionary mapping axis names to their concrete integer values."""
14 | 
15 |     inputs_to_dump: Dict[str, Any]
16 |     """Inputs to dump. Maps input name to the tensor to dump. This field will be further stored
17 |     to disk as a tensor blob."""
18 | 
19 |     order: int
20 |     """Sequential order number for this entry in the collection process."""
21 | 
22 |     cuda_graph_snapshot: Optional[Dict[str, Any]] = None
23 |     """CPU snapshot of tensors collected during CUDA Graph replay, if applicable."""
24 | 


--------------------------------------------------------------------------------
/web/apps/web/app/kernels/[name]/constraints.tsx:
--------------------------------------------------------------------------------
 1 | import { Card, CardContent } from "@flashinfer-bench/ui"
 2 | import { Definition } from "@/lib/schemas"
 3 | 
 4 | export function ConstraintsSection({ definition }: { definition: Definition }) {
 5 |   if (!definition.constraints || definition.constraints.length === 0) return null
 6 |   return (
 7 |     <section id="constraints">
 8 |       <h2 className="text-2xl font-semibold mb-4">Constraints</h2>
 9 |       <Card>
10 |         <CardContent className="pt-6">
11 |           <ul className="space-y-2">
12 |             {definition.constraints.map((constraint, idx) => (
13 |               <li key={idx} className="text-sm font-mono text-muted-foreground">
14 |                 • {constraint}
15 |               </li>
16 |             ))}
17 |           </ul>
18 |         </CardContent>
19 |       </Card>
20 |     </section>
21 |   )
22 | }
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ### Python ###
 2 | # Byte-compiled / optimized / DLL files
 3 | __pycache__/
 4 | *.py[cod]
 5 | *$py.class
 6 | 
 7 | # C extensions
 8 | *.so
 9 | 
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | _build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | share/python-wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 | 
31 | # PyInstaller
32 | #  Usually these files are written by a python script from a template
33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 | 
37 | # IMPORTANT: Include web project's lib directories
38 | !web/**/lib/
39 | !web/**/lib/**
40 | 
41 | # Sphinx
42 | .sphinx-deps
43 | web/**/public
44 | 
45 | .claude
46 | .vscode
47 | 
48 | tmp/
49 | AGENTS.md
50 | .coverage
51 | 


--------------------------------------------------------------------------------
/flashinfer_bench/bench/evaluators/registry.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import List, Type
 4 | 
 5 | from flashinfer_bench.data import Definition
 6 | 
 7 | from .default import DefaultEvaluator
 8 | from .evaluator import Evaluator
 9 | from .lowbit import LowBitEvaluator
10 | from .sampling import SamplingEvaluator
11 | 
12 | EvaluatorType = Type[Evaluator]
13 | 
14 | _EVALUATORS: List[EvaluatorType] = [SamplingEvaluator, LowBitEvaluator]
15 | _DEFAULT_EVALUATOR: EvaluatorType = DefaultEvaluator
16 | 
17 | 
18 | def resolve_evaluator(definition: Definition) -> EvaluatorType:
19 |     matches = [cls for cls in _EVALUATORS if cls.can_evaluate(definition)]
20 |     if len(matches) == 1:
21 |         return matches[0]
22 |     if len(matches) == 0:
23 |         return _DEFAULT_EVALUATOR
24 |     raise ValueError(f"Multiple evaluator matches for definition '{definition.name}'")
25 | 


--------------------------------------------------------------------------------
/web/apps/docs/app/[[...mdxPath]]/page.jsx:
--------------------------------------------------------------------------------
 1 | import { generateStaticParamsFor, importPage } from 'nextra/pages'
 2 | import { useMDXComponents as getMDXComponents } from '../../mdx-components'
 3 | 
 4 | export const generateStaticParams = generateStaticParamsFor('mdxPath')
 5 | 
 6 | export async function generateMetadata(props) {
 7 |   const params = await props.params
 8 |   const { metadata } = await importPage(params.mdxPath)
 9 |   return metadata
10 | }
11 | 
12 | const Wrapper = getMDXComponents().wrapper
13 | 
14 | export default async function Page(props) {
15 |   const params = await props.params
16 |   const {
17 |     default: MDXContent,
18 |     toc,
19 |     metadata,
20 |     sourceCode
21 |   } = await importPage(params.mdxPath)
22 |   return (
23 |     <Wrapper toc={toc} metadata={metadata} sourceCode={sourceCode}>
24 |       <MDXContent {...props} params={params} />
25 |     </Wrapper>
26 |   )
27 | }
28 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/textarea.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | 
 3 | import { cn } from "@flashinfer-bench/utils"
 4 | 
 5 | export interface TextareaProps
 6 |   extends React.TextareaHTMLAttributes<HTMLTextAreaElement> {}
 7 | 
 8 | const Textarea = React.forwardRef<HTMLTextAreaElement, TextareaProps>(
 9 |   ({ className, ...props }, ref) => {
10 |     return (
11 |       <textarea
12 |         className={cn(
13 |           "flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-hidden focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
14 |           className
15 |         )}
16 |         ref={ref}
17 |         {...props}
18 |       />
19 |     )
20 |   }
21 | )
22 | Textarea.displayName = "Textarea"
23 | 
24 | export { Textarea }
25 | 


--------------------------------------------------------------------------------
/flashinfer_bench/compile/__init__.py:
--------------------------------------------------------------------------------
 1 | """Compiler subsystem package.
 2 | 
 3 | This package provides the infrastructure for building solutions into executable runnables.
 4 | It includes:
 5 | - Builder: Abstract base class for different language/build system implementations
 6 | - BuilderRegistry: Central registry for managing and dispatching builders
 7 | - Runnable: Executable wrapper around compiled solutions
 8 | - RunnableMetadata: Metadata about build process and source
 9 | 
10 | The typical workflow is:
11 | 1. Get the singleton registry: registry = BuilderRegistry.get_instance()
12 | 2. Build a solution: runnable = registry.build(definition, solution)
13 | 3. Execute: result = runnable(**inputs)
14 | """
15 | 
16 | from .builder import Builder, BuildError
17 | from .registry import BuilderRegistry
18 | from .runnable import Runnable, RunnableMetadata
19 | 
20 | __all__ = ["Builder", "BuildError", "BuilderRegistry", "Runnable", "RunnableMetadata"]
21 | 


--------------------------------------------------------------------------------
/web/apps/web/lib/utils.ts:
--------------------------------------------------------------------------------
 1 | // Keep web-only helpers here. Shared `cn` moved to @flashinfer-bench/utils
 2 | 
 3 | export function formatBytes(bytes: number, decimals = 2) {
 4 |   if (bytes === 0) return "0 Bytes"
 5 | 
 6 |   const k = 1024
 7 |   const dm = decimals < 0 ? 0 : decimals
 8 |   const sizes = ["Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
 9 | 
10 |   const i = Math.floor(Math.log(bytes) / Math.log(k))
11 | 
12 |   return parseFloat((bytes / Math.pow(k, i)).toFixed(dm)) + " " + sizes[i]
13 | }
14 | 
15 | export function formatDate(date: string | Date) {
16 |   return new Intl.DateTimeFormat("en-US", {
17 |     month: "long",
18 |     day: "numeric",
19 |     year: "numeric",
20 |   }).format(new Date(date))
21 | }
22 | 
23 | export function generateBranchName(type: string, trace: string) {
24 |   const timestamp = Date.now()
25 |   const random = Math.random().toString(36).substring(2, 8)
26 |   return `${type}/${trace}/${timestamp}-${random}`
27 | }
28 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/input.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | 
 3 | import { cn } from "@flashinfer-bench/utils"
 4 | 
 5 | export interface InputProps
 6 |   extends React.InputHTMLAttributes<HTMLInputElement> {}
 7 | 
 8 | const Input = React.forwardRef<HTMLInputElement, InputProps>(
 9 |   ({ className, type, ...props }, ref) => {
10 |     return (
11 |       <input
12 |         type={type}
13 |         className={cn(
14 |           "flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-hidden focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
15 |           className
16 |         )}
17 |         ref={ref}
18 |         {...props}
19 |       />
20 |     )
21 |   }
22 | )
23 | Input.displayName = "Input"
24 | 
25 | export { Input }
26 | 


--------------------------------------------------------------------------------
/docs/op_type_schema/gqa_ragged.md:
--------------------------------------------------------------------------------
 1 | # gqa_ragged
 2 | 
 3 |  Grouped Query Attention (GQA) with ragged (variable-length) tensor layout. This variant efficiently handles batches of sequences with different lengths by using ragged tensors, eliminating the need for padding and improving memory efficiency for variable-length inputs.
 4 | 
 5 | ## prefill
 6 | 
 7 | Axes (6 dimensions):
 8 | - `total_q`, `total_kv`, `len_indptr`: variable
 9 | - `num_qo_heads`, `num_kv_heads`, `head_dim`: constant
10 | 
11 | Inputs (5 tensors + 1 scalar):
12 | - `q`: query tensor [total_q, num_qo_heads, head_dim]
13 | - `k`, `v`: key-value tensors [total_kv, num_kv_heads, head_dim]
14 | - `qo_indptr`, `kv_indptr`: sequence offsets
15 | - `sm_scale`: softmax scale (scalar)
16 | 
17 | Outputs (2 tensors):
18 | - `output`: attention output [total_q, num_qo_heads, head_dim]
19 | - `lse`: log-sum-exp values [total_q, num_qo_heads]
20 | 
21 | Constraints:
22 | - `total_q == qo_indptr[-1]`
23 | - `total_kv == kv_indptr[-1]`
24 | 


--------------------------------------------------------------------------------
/web/apps/web/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es2017",
 4 |     "lib": ["dom", "dom.iterable", "es2017"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "strict": true,
 8 |     "noEmit": true,
 9 |     "esModuleInterop": true,
10 |     "module": "esnext",
11 |     "moduleResolution": "bundler",
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "jsx": "preserve",
15 |     "incremental": true,
16 |     "plugins": [
17 |       {
18 |         "name": "next"
19 |       }
20 |     ],
21 |     "baseUrl": ".",
22 |     "paths": {
23 |       "@/*": ["./*"],
24 |       "@flashinfer-bench/ui": ["../packages/ui/src/index.ts"],
25 |       "@flashinfer-bench/utils": ["../packages/utils/src/index.ts"],
26 |       "@flashinfer-bench/config": ["../packages/config/src/index.ts"]
27 |     }
28 |   },
29 |   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts", "middleware.ts"],
30 |   "exclude": ["node_modules"]
31 | }
32 | 


--------------------------------------------------------------------------------
/.github/workflows/unit_test.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |   pull_request:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   run_unit_test:
10 |     name: Run unit tests on ubuntu-latest and Python ${{ matrix.python }}
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python: ['3.10', '3.11', '3.12', '3.13']
16 | 
17 |     steps:
18 |       - name: Checkout
19 |         uses: actions/checkout@v4
20 |         with:
21 |           submodules: recursive
22 | 
23 |       - name: Set up uv
24 |         uses: astral-sh/setup-uv@v7.1.0
25 |         with:
26 |           enable-cache: true
27 |           cache-dependency-glob: pyproject.toml
28 | 
29 |       - name: Install Python ${{ matrix.python }}
30 |         run: uv python install ${{ matrix.python }}
31 | 
32 |       - name: Install dependencies
33 |         run: uv sync --python ${{ matrix.python }} --extra dev
34 | 
35 |       - name: Run Python tests
36 |         run: uv run --python ${{ matrix.python }} pytest
37 | 


--------------------------------------------------------------------------------
/web/apps/web/lib/schemas/model.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod"
 2 | 
 3 | export const ModuleTypeSchema = z.enum(["block", "layer"])
 4 | 
 5 | export const ModuleSchema = z.object({
 6 |   count: z.number(),
 7 |   parent: z.string().optional(),
 8 |   type: ModuleTypeSchema,
 9 |   definitions: z.array(z.string()).optional(),
10 | })
11 | 
12 | export const ModelSchema = z.object({
13 |   id: z.string(),
14 |   name: z.string(),
15 |   description: z.string().optional(),
16 |   modules: z.record(z.string(), ModuleSchema),
17 | })
18 | 
19 | export const ModelHierarchySchema: z.ZodType<{
20 |   name: string
21 |   type: z.infer<typeof ModuleTypeSchema>
22 |   definitions?: string[]
23 |   children?: any[]
24 | }> = z.object({
25 |   name: z.string(),
26 |   type: ModuleTypeSchema,
27 |   definitions: z.array(z.string()).optional(),
28 |   children: z.array(z.lazy(() => ModelHierarchySchema)).optional(),
29 | })
30 | export type Module = z.infer<typeof ModuleSchema>
31 | export type Model = z.infer<typeof ModelSchema>
32 | export type ModelHierarchy = z.infer<typeof ModelHierarchySchema>
33 | export type ModuleType = z.infer<typeof ModuleTypeSchema>
34 | 


--------------------------------------------------------------------------------
/flashinfer_bench/integration/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import inspect
 4 | from typing import Any, Dict, Mapping, Tuple
 5 | from weakref import WeakKeyDictionary
 6 | 
 7 | 
 8 | class ArgBinder:
 9 |     """Cache inspect.signature and bind once per callable."""
10 | 
11 |     def __init__(self, fn) -> None:
12 |         self._sig = inspect.signature(fn)
13 | 
14 |     @classmethod
15 |     def from_callable(cls, fn) -> "ArgBinder":
16 |         return cls(fn)
17 | 
18 |     def bind(self, args: Tuple[Any, ...], kwargs: Mapping[str, Any]) -> Dict[str, Any]:
19 |         ba = self._sig.bind_partial(*args, **kwargs)
20 |         ba.apply_defaults()
21 |         return dict(ba.arguments)
22 | 
23 | 
24 | class ContextStore:
25 |     """Per-instance loose store; adapter decides fields."""
26 | 
27 |     def __init__(self) -> None:
28 |         self._store: "WeakKeyDictionary[object, Dict[str, Any]]" = WeakKeyDictionary()
29 | 
30 |     def get(self, inst: object) -> Dict[str, Any]:
31 |         d = self._store.get(inst)
32 |         if d is None:
33 |             d = {}
34 |             self._store[inst] = d
35 |         return d
36 | 


--------------------------------------------------------------------------------
/flashinfer_bench/tracing/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from .builtin.policies import (
 4 |     BUILTIN_FILTER_POLICIES,
 5 |     BUILTIN_INPUT_DUMP_POLICIES,
 6 |     AttentionFilterPolicy,
 7 |     KeepAllPolicy,
 8 |     KeepFirstByAxesPolicy,
 9 |     KeepFirstKPolicy,
10 |     dump_all,
11 |     dump_int32,
12 |     dump_none,
13 | )
14 | from .config import FilterPolicy, FilterPolicyFactory, InputDumpPolicyFunction, TracingConfig
15 | from .runtime import TracingRuntime, get_tracing_runtime
16 | from .tracing import disable_tracing, enable_tracing
17 | from .workload_entry import WorkloadEntry
18 | 
19 | __all__ = [
20 |     "disable_tracing",
21 |     "enable_tracing",
22 |     "get_tracing_runtime",
23 |     "TracingRuntime",
24 |     "TracingConfig",
25 |     "WorkloadEntry",
26 |     "FilterPolicy",
27 |     "FilterPolicyFactory",
28 |     "InputDumpPolicyFunction",
29 |     "BUILTIN_FILTER_POLICIES",
30 |     "KeepAllPolicy",
31 |     "KeepFirstKPolicy",
32 |     "KeepFirstByAxesPolicy",
33 |     "AttentionFilterPolicy",
34 |     "BUILTIN_INPUT_DUMP_POLICIES",
35 |     "dump_all",
36 |     "dump_none",
37 |     "dump_int32",
38 | ]
39 | 


--------------------------------------------------------------------------------
/web/apps/web/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | import type { Metadata } from "next"
 2 | import { getDefaultMetadata } from "@flashinfer-bench/config"
 3 | import { Inter } from "next/font/google"
 4 | import "./globals.css"
 5 | import { Providers } from "@flashinfer-bench/ui"
 6 | import { Header } from "@/components/layout/header"
 7 | import { Footer } from "@/components/layout/footer"
 8 | import { Toaster } from "@flashinfer-bench/ui"
 9 | import { Analytics } from "@vercel/analytics/next"
10 | 
11 | 
12 | const inter = Inter({ subsets: ["latin"] })
13 | 
14 | export const metadata: Metadata = getDefaultMetadata() as Metadata
15 | 
16 | export default function RootLayout({
17 |   children,
18 | }: {
19 |   children: React.ReactNode
20 | }) {
21 |   return (
22 |     <html lang="en" suppressHydrationWarning>
23 |       <body className={inter.className}>
24 |         <Providers>
25 |           <div className="relative flex min-h-screen flex-col">
26 |             <Header />
27 |             <main className="flex-1">{children}</main>
28 |             <Footer />
29 |           </div>
30 |           <Toaster />
31 |         </Providers>
32 |         <Analytics />
33 |       </body>
34 |     </html>
35 |   )
36 | }
37 | 


--------------------------------------------------------------------------------
/flashinfer_bench/bench/runner/runner.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import logging
 4 | from abc import ABC, abstractmethod
 5 | from dataclasses import dataclass
 6 | from pathlib import Path
 7 | from typing import Any, Dict, List
 8 | 
 9 | import torch
10 | 
11 | from flashinfer_bench.bench.config import BenchmarkConfig
12 | from flashinfer_bench.data import Definition, Evaluation, Solution, Workload
13 | 
14 | 
15 | class RunnerError(RuntimeError): ...
16 | 
17 | 
18 | class RunnerFatalError(RunnerError): ...
19 | 
20 | 
21 | class BaselineHandle(str):
22 |     pass
23 | 
24 | 
25 | @dataclass
26 | class DeviceBaseline:
27 |     handle: BaselineHandle
28 |     definition: Definition
29 |     device: str
30 |     inputs: List[List[Any]]
31 |     outputs: List[List[torch.Tensor]]
32 |     mean_latency_ms: float
33 | 
34 | 
35 | class Runner(ABC):
36 |     def __init__(self, logger: logging.Logger) -> None: ...
37 | 
38 |     @abstractmethod
39 |     def run_workload(
40 |         self,
41 |         definition: Definition,
42 |         workload: Workload,
43 |         solutions: List[Solution],
44 |         config: BenchmarkConfig,
45 |         root: Path,
46 |     ) -> Dict[str, Evaluation]: ...
47 | 


--------------------------------------------------------------------------------
/flashinfer_bench/integration/flashinfer/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from flashinfer_bench.integration.patch_manager import get_manager
 4 | 
 5 | from .adapters.gqa_paged_decode import GQAPagedDecodeAdapter
 6 | from .adapters.gqa_paged_prefill import GQAPagedPrefillAdapter
 7 | from .adapters.mla_paged import MLAPagedAdapter
 8 | from .adapters.ragged_prefill import RaggedPrefillAdapter
 9 | from .adapters.rmsnorm import RMSNormAdapter
10 | 
11 | 
12 | def install_flashinfer_integrations() -> None:
13 |     """
14 |     Install patches for a set of adapters. If a target does not exist in
15 |     the current environment, skip silently. Idempotent.
16 |     """
17 |     print("Installing flashinfer integrations...")
18 |     mgr = get_manager()
19 | 
20 |     adapters = [
21 |         GQAPagedPrefillAdapter(),
22 |         RaggedPrefillAdapter(),
23 |         GQAPagedDecodeAdapter(),
24 |         MLAPagedAdapter(),
25 |         RMSNormAdapter(),
26 |     ]
27 | 
28 |     for adp in adapters:
29 |         try:
30 |             targets = adp.targets()
31 |         except Exception:
32 |             continue
33 |         for spec in targets:
34 |             mgr.patch(spec, adp.make_wrapper)
35 | 
36 | 
37 | __all__ = ["install_flashinfer_integrations"]
38 | 


--------------------------------------------------------------------------------
/web/apps/web/app/models/page.tsx:
--------------------------------------------------------------------------------
 1 | import { ModelCard } from "@/components/model-card"
 2 | import { Layers } from "lucide-react"
 3 | import { getAllModels } from "@/lib/data-loader"
 4 | 
 5 | export default async function ModelsPage() {
 6 |   const models = await getAllModels()
 7 | 
 8 |   return (
 9 |     <div className="container py-8">
10 |       <div className="space-y-6">
11 |         <div>
12 |           <h1 className="text-3xl font-bold tracking-tight">Models</h1>
13 |           <p className="text-muted-foreground">
14 |             Explore model architectures and their kernel implementations
15 |           </p>
16 |         </div>
17 | 
18 |         <div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-3">
19 |           {models.map((model) => (
20 |             <ModelCard
21 |               key={model.id}
22 |               model={model}
23 |               href={`/models/${model.id}?from=models`}
24 |             />
25 |           ))}
26 |         </div>
27 | 
28 |         {models.length === 0 && (
29 |           <div className="text-center py-12">
30 |             <Layers className="h-12 w-12 text-muted-foreground mx-auto mb-4" />
31 |             <p className="text-muted-foreground">No models found</p>
32 |           </div>
33 |         )}
34 |       </div>
35 |     </div>
36 |   )
37 | }
38 | 


--------------------------------------------------------------------------------
/docs/op_type_schema/sampling.md:
--------------------------------------------------------------------------------
 1 | # sampling
 2 | 
 3 |  Token sampling operations for language model generation. These methods select the next token from a probability distribution, implementing various strategies to balance between diversity and quality in text generation by filtering and sampling from the model's output probabilities.
 4 | 
 5 | Variants:
 6 | - Top-k sampling: Keeps only the k highest probability tokens, renormalizes the distribution, then samples. Controls diversity by limiting the vocabulary size to the most likely tokens
 7 | - Top-p sampling: Filters tokens using cumulative probability threshold (nucleus sampling). Dynamically adjusts vocabulary size based on probability mass, maintaining diversity while avoiding low-probability tokens
 8 | - Top-k + Top-p sampling: Combines both filtering methods for fine-grained control over generation quality and diversity
 9 | 
10 | Axes (2 dimensions):
11 | - `batch_size`: variable
12 | - `vocab_size`: constant
13 | 
14 | Inputs (1 to 3 tensors):
15 | - `probs`: probability distributions after softmax [batch_size, vocab_size]
16 | - Sampling-specific parameters:
17 |   - `top_k`: for top-k sampling [batch_size]
18 |   - `top_p`: for top-p/nucleus sampling [batch_size]
19 | 
20 | Outputs (1 tensor):
21 | - `samples`: sampled token indices [batch_size]
22 | 


--------------------------------------------------------------------------------
/examples/kernel_generator/README.md:
--------------------------------------------------------------------------------
 1 | # Kernel Generator
 2 | 
 3 | A multi-turn kernel generating agent that uses FlashInfer-Bench for evaluation feedback. It can conduct sequential multi-turn generation and beam search kernel exploration.
 4 | 
 5 | ## Usage
 6 | 
 7 | 1. **Configure generation settings** in `kernel_generator_example.py`:
 8 |    - Set `model_name` (e.g., `"gpt-5-2025-08-07"`)
 9 |    - Set `language` (`"cuda"` or `"triton"`, will support more in the future)
10 |    - Set `target_gpu` (e.g., `"B200"`, `"H100"`, `"A100"`)
11 |    - Optionally set `definition` to target a specific kernel (leave empty to generate all definitions in the traceset)
12 | 
13 | 2. **Set traceset path**:
14 |    - Update `traceset_path` to your flashinfer-trace dataset directory
15 | 
16 | 3. **To Enable beam search**:
17 |    - Uncomment lines 97-98 to use beam search mode
18 | 
19 | 4. **Set API credentials**:
20 |    - Create a `.env` file by following the .env.example:
21 |      ```
22 |      LLM_API_KEY=your_api_key
23 |      BASE_URL=your_base_url  # Optional, for non-OpenAI APIs
24 |      ```
25 | 
26 | 5. **Run the generator**:
27 |    ```bash
28 |    python kernel_generator_example.py
29 |    ```
30 | 
31 | Generated solutions are saved to `{traceset_path}/solutions/{op_type}/{definition_name}/{solution_name}.json`
32 | 


--------------------------------------------------------------------------------
/tests/integration/test_utils.py:
--------------------------------------------------------------------------------
 1 | from flashinfer_bench.integration.utils import ArgBinder, ContextStore
 2 | 
 3 | 
 4 | def test_arg_binder_bind_with_defaults_and_kwargs():
 5 |     def fn(a, b=2, *, c=3):
 6 |         return a + b + c
 7 | 
 8 |     binder = ArgBinder.from_callable(fn)
 9 |     bound = binder.bind(args=(1,), kwargs={"c": 10})
10 |     # Ensure arguments resolved with defaults applied
11 |     assert bound == {"a": 1, "b": 2, "c": 10}
12 | 
13 | 
14 | def test_arg_binder_bind_method_like_signature():
15 |     class C:
16 |         def m(self, x, y=5):
17 |             return x + y
18 | 
19 |     # Use unbound function so signature includes 'self'
20 |     m = C.m
21 |     binder = ArgBinder.from_callable(m)
22 |     # Simulate binding call args including self by passing instance first
23 |     obj = C()
24 |     bound = binder.bind(args=(obj, 7), kwargs={})
25 |     assert bound["x"] == 7 and bound["y"] == 5 and bound.get("self") is obj
26 | 
27 | 
28 | def test_context_store_per_instance_isolated_and_mutable():
29 |     store = ContextStore()
30 | 
31 |     class A:
32 |         pass
33 | 
34 |     a1, a2 = A(), A()
35 | 
36 |     d1 = store.get(a1)
37 |     d2 = store.get(a2)
38 |     assert d1 is not d2 and d1 == {} and d2 == {}
39 | 
40 |     d1["x"] = 42
41 |     assert store.get(a1)["x"] == 42
42 |     assert "x" not in store.get(a2)
43 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/hover-card.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import * as React from "react"
 4 | import * as HoverCardPrimitive from "@radix-ui/react-hover-card"
 5 | 
 6 | import { cn } from "@flashinfer-bench/utils"
 7 | 
 8 | const HoverCard = HoverCardPrimitive.Root
 9 | 
10 | const HoverCardTrigger = HoverCardPrimitive.Trigger
11 | 
12 | const HoverCardContent = React.forwardRef<
13 |   React.ElementRef<typeof HoverCardPrimitive.Content>,
14 |   React.ComponentPropsWithoutRef<typeof HoverCardPrimitive.Content>
15 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => (
16 |   <HoverCardPrimitive.Content
17 |     ref={ref}
18 |     align={align}
19 |     sideOffset={sideOffset}
20 |     className={cn(
21 |       "z-50 w-64 rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-none data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
22 |       className
23 |     )}
24 |     {...props}
25 |   />
26 | ))
27 | HoverCardContent.displayName = HoverCardPrimitive.Content.displayName
28 | 
29 | export { HoverCard, HoverCardTrigger, HoverCardContent }
30 | 


--------------------------------------------------------------------------------
/docs/api/rst/compile.md:
--------------------------------------------------------------------------------
 1 | # flashinfer_bench.compile
 2 | 
 3 | `flashinfer_bench.compile` provides infrastructure for building solutions into executable runnables.
 4 | 
 5 | The typical workflow is:
 6 | 
 7 | 1. Get the singleton registry: `registry = BuilderRegistry.get_instance()`
 8 | 2. Build a solution: `runnable = registry.build(definition, solution)`
 9 | 3. Execute: `result = runnable(**inputs)`
10 | 
11 | ## Registry
12 | 
13 | ```{eval-rst}
14 | .. currentmodule:: flashinfer_bench.compile
15 | 
16 | .. autoclass:: BuilderRegistry
17 |    :members:
18 | ```
19 | 
20 | ## Builder
21 | 
22 | ```{eval-rst}
23 | .. autoclass:: Builder
24 |    :members:
25 | 
26 | .. autoexception:: BuildError
27 | ```
28 | 
29 | ## Runnable
30 | 
31 | ```{eval-rst}
32 | .. autoclass:: Runnable
33 |    :members:
34 | 
35 | .. autoclass:: RunnableMetadata
36 |    :members:
37 | ```
38 | 
39 | ## Concrete Builders
40 | 
41 | ```{eval-rst}
42 | .. autoclass:: flashinfer_bench.compile.builders.PythonBuilder
43 |    :members:
44 |    :show-inheritance:
45 | 
46 | .. autoclass:: flashinfer_bench.compile.builders.TritonBuilder
47 |    :members:
48 |    :show-inheritance:
49 | 
50 | .. autoclass:: flashinfer_bench.compile.builders.TVMFFIBuilder
51 |    :members:
52 |    :show-inheritance:
53 | 
54 | .. autoclass:: flashinfer_bench.compile.builders.TorchBuilder
55 |    :members:
56 |    :show-inheritance:
57 | ```
58 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/badge.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import { cva, type VariantProps } from "class-variance-authority"
 3 | 
 4 | import { cn } from "@flashinfer-bench/utils"
 5 | 
 6 | const badgeVariants = cva(
 7 |   "inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-hidden focus:ring-2 focus:ring-ring focus:ring-offset-2",
 8 |   {
 9 |     variants: {
10 |       variant: {
11 |         default:
12 |           "border-transparent bg-primary text-primary-foreground hover:bg-primary/80",
13 |         secondary:
14 |           "border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80",
15 |         destructive:
16 |           "border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80",
17 |         outline: "text-foreground",
18 |       },
19 |     },
20 |     defaultVariants: {
21 |       variant: "default",
22 |     },
23 |   }
24 | )
25 | 
26 | export type BadgeProps = React.ComponentPropsWithoutRef<"div"> &
27 |   VariantProps<typeof badgeVariants>
28 | 
29 | const Badge = React.forwardRef<HTMLDivElement, BadgeProps>(
30 |   ({ className, variant, ...props }, ref) => (
31 |     <div
32 |       ref={ref}
33 |       className={cn(badgeVariants({ variant }), className)}
34 |       {...props}
35 |     />
36 |   )
37 | )
38 | Badge.displayName = "Badge"
39 | 
40 | export { Badge, badgeVariants }
41 | 


--------------------------------------------------------------------------------
/flashinfer_bench/data/__init__.py:
--------------------------------------------------------------------------------
 1 | """Data layer with strongly-typed dataclasses for FlashInfer Bench."""
 2 | 
 3 | from .definition import AxisConst, AxisSpec, AxisVar, Definition, TensorSpec
 4 | from .json_utils import (
 5 |     append_jsonl_file,
 6 |     load_json_file,
 7 |     load_jsonl_file,
 8 |     save_json_file,
 9 |     save_jsonl_file,
10 | )
11 | from .solution import BuildSpec, Solution, SourceFile, SupportedLanguages
12 | from .trace import Correctness, Environment, Evaluation, EvaluationStatus, Performance, Trace
13 | from .trace_set import TraceSet
14 | from .workload import InputSpec, RandomInput, SafetensorsInput, ScalarInput, Workload
15 | 
16 | __all__ = [
17 |     # Definition types
18 |     "AxisConst",
19 |     "AxisSpec",
20 |     "AxisVar",
21 |     "TensorSpec",
22 |     "Definition",
23 |     # Solution types
24 |     "SourceFile",
25 |     "BuildSpec",
26 |     "SupportedLanguages",
27 |     "Solution",
28 |     # Workload types
29 |     "RandomInput",
30 |     "ScalarInput",
31 |     "SafetensorsInput",
32 |     "InputSpec",
33 |     "Workload",
34 |     # Trace types
35 |     "Correctness",
36 |     "Performance",
37 |     "Environment",
38 |     "Evaluation",
39 |     "EvaluationStatus",
40 |     "Trace",
41 |     # TraceSet
42 |     "TraceSet",
43 |     # JSON functions
44 |     "save_json_file",
45 |     "load_json_file",
46 |     "save_jsonl_file",
47 |     "load_jsonl_file",
48 |     "append_jsonl_file",
49 | ]
50 | 


--------------------------------------------------------------------------------
/docs/index.mdx:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: FlashInfer Bench
 3 | description: Benchmarking and infrastructure for GPU kernel optimization.
 4 | ---
 5 | 
 6 | # FlashInfer Bench
 7 | 
 8 | FlashInfer-Bench is a comprehensive benchmark and infrastructure designed to create a "virtuous cycle" where AI can automatically optimize and improve the core GPU kernels of the AI systems it runs on. It provides a systematic framework to identify performance bottlenecks, generate solutions, and deploy them immediately into production.
 9 | 
10 | - Standardized Schema: Introduces "FlashInfer Trace," a standardized format to describe GPU kernel workloads, solutions, and results.
11 | - Real-World Benchmarks: Datasets are curated from production-grade LLM serving traffic.
12 | - Seamless Deployment: Enables immediate integration of high-performance kernels into live LLM engines.
13 | - Performance Tracking: A public leaderboard visualizes and ranks kernel performance.
14 | 
15 | Useful links:
16 | 
17 | - FlashInfer Blog: https://flashinfer.ai/
18 | - GitHub: https://github.com/flashinfer-ai/flashinfer-bench/
19 | - Join Slack (#flashinfer-bench channel): https://join.slack.com/t/flashinfer/shared_invite/zt-379wct3hc-D5jR~1ZKQcU00WHsXhgvtA
20 | 
21 | ## Next Steps
22 | 
23 | - Get Started: ./start/quick_start
24 | - Installation: ./start/installation
25 | - Schema (FlashInfer Trace): ./flashinfer_trace/flashinfer_trace
26 | - Tutorials: ./tutorials/bring_your_own_kernel
27 | - API Reference: ./api/reference
28 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | def _torch_cuda_available() -> bool:
 8 |     """Check if CUDA is available from PyTorch.
 9 | 
10 |     Returns
11 |     -------
12 |     bool
13 |         True if CUDA is available from PyTorch, False otherwise.
14 |     """
15 |     try:
16 |         import torch
17 | 
18 |         return torch.cuda.is_available()
19 |     except ImportError:
20 |         return False
21 | 
22 | 
23 | def pytest_collection_modifyitems(config: pytest.Config, items: List[pytest.Item]) -> None:
24 |     """Modify pytest collection to skip tests that require CUDA when CUDA is not available."""
25 |     if _torch_cuda_available():
26 |         return
27 | 
28 |     skip_cuda = pytest.mark.skip(reason="CUDA not available from PyTorch, skip test")
29 |     for item in items:
30 |         if any(item.iter_markers(name="requires_torch_cuda")):
31 |             item.add_marker(skip_cuda)
32 | 
33 | 
34 | @pytest.fixture
35 | def tmp_cache_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
36 |     """Use isolated temporary directory for cache in all tests.
37 | 
38 |     This fixture automatically sets FIB_CACHE_PATH to a unique temporary
39 |     directory for each test, preventing cache pollution between tests.
40 |     """
41 |     cache_dir = tmp_path / "cache"
42 |     cache_dir.mkdir(parents=True, exist_ok=True)
43 |     monkeypatch.setenv("FIB_CACHE_PATH", str(cache_dir))
44 |     return cache_dir
45 | 


--------------------------------------------------------------------------------
/flashinfer_bench/apply/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Literal
 3 | 
 4 | 
 5 | @dataclass
 6 | class ApplyConfig:
 7 |     # The maximum absolute difference allowed between the reference and the candidate
 8 |     max_atol: float = 1e-2
 9 |     # The maximum relative difference allowed between the reference and the candidate
10 |     max_rtol: float = 1e-5
11 |     # The ratio of the top solutions to AOT build for each definition
12 |     aot_ratio: float = 1.0
13 |     # The policy when a runtime ApplyKey misses the table
14 |     on_miss_policy: Literal["fallback_only", "use_def_best"] = "fallback_only"
15 | 
16 |     def __post_init__(self) -> None:
17 |         if not isinstance(self.max_atol, float):
18 |             raise ValueError("max_atol must be a float")
19 |         if not isinstance(self.max_rtol, float):
20 |             raise ValueError("max_rtol must be a float")
21 |         if not isinstance(self.aot_ratio, float):
22 |             raise ValueError("aot_ratio must be a float")
23 | 
24 |         if self.aot_ratio < 0 or self.aot_ratio > 1:
25 |             raise ValueError("aot_ratio must be between 0 and 1")
26 |         if self.on_miss_policy not in ["fallback_only", "use_def_best"]:
27 |             raise ValueError("on_miss_policy must be either 'fallback_only' or 'use_def_best'")
28 |         if self.max_atol <= 0:
29 |             raise ValueError("max_atol must be positive")
30 |         if self.max_rtol <= 0:
31 |             raise ValueError("max_rtol must be positive")
32 | 


--------------------------------------------------------------------------------
/.github/workflows/build-and-upload-pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Build and upload to PyPI
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |   release:
10 |     types:
11 |       - published
12 | 
13 | jobs:
14 |   build:
15 |     name: Build distribution
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |         with:
20 |           fetch-depth: 0
21 | 
22 |       - uses: actions/setup-python@v5
23 |         with:
24 |           python-version: '3.11'
25 | 
26 |       - name: Build
27 |         run: |
28 |           pipx run build
29 |           pipx run twine check dist/*
30 | 
31 |       - uses: actions/upload-artifact@v4
32 |         with:
33 |           name: dist
34 |           path: dist/*
35 | 
36 |   upload_pypi:
37 |     needs: build
38 |     runs-on: ubuntu-latest
39 |     environment: pypi
40 |     permissions:
41 |       id-token: write
42 |       attestations: write
43 |     if: github.event_name == 'release' && github.event.action == 'published'
44 |     steps:
45 |       - uses: actions/download-artifact@v4
46 |         with:
47 |           name: dist
48 |           path: dist
49 | 
50 |       - name: Generate artifact attestation for sdist and wheel
51 |         uses: actions/attest-build-provenance@v1
52 |         with:
53 |           subject-path: dist/*
54 | 
55 |       - name: Publish package distributions to PyPI
56 |         uses: pypa/gh-action-pypi-publish@release/v1
57 |         with:
58 |           attestations: true
59 |           verbose: true
60 | 


--------------------------------------------------------------------------------
/flashinfer_bench/logging.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Optional, Union
 3 | 
 4 | _PACKAGE_LOGGER_NAME = "flashinfer-bench"
 5 | 
 6 | logging.getLogger(_PACKAGE_LOGGER_NAME).addHandler(logging.NullHandler())
 7 | 
 8 | 
 9 | def get_logger(name: Optional[str] = None) -> logging.Logger:
10 |     """Return a logger namespaced under the package root."""
11 |     full_name = _PACKAGE_LOGGER_NAME if not name else f"{_PACKAGE_LOGGER_NAME}.{name}"
12 |     return logging.getLogger(full_name)
13 | 
14 | 
15 | def configure_logging(
16 |     level: Union[int, str] = "INFO",
17 |     *,
18 |     handler: Optional[logging.Handler] = None,
19 |     formatter: Optional[logging.Formatter] = None,
20 |     propagate: bool = False,
21 | ) -> logging.Logger:
22 |     """Configure the root package logger and return it."""
23 |     logger = logging.getLogger(_PACKAGE_LOGGER_NAME)
24 | 
25 |     if isinstance(level, str):
26 |         numeric_level = logging.getLevelName(level.upper())
27 |         if isinstance(numeric_level, str):
28 |             raise ValueError(f"Unknown log level: {level}")
29 |         level = numeric_level
30 | 
31 |     logger.setLevel(level)
32 | 
33 |     if handler is None:
34 |         handler = logging.StreamHandler()
35 |     if formatter is None:
36 |         formatter = logging.Formatter(
37 |             fmt="[%(asctime)s] %(levelname)s %(name)s: %(message)s", datefmt="%H:%M:%S"
38 |         )
39 |     handler.setFormatter(formatter)
40 | 
41 |     logger.handlers.clear()
42 |     logger.addHandler(handler)
43 |     logger.propagate = propagate
44 | 
45 |     return logger
46 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # To use:
 2 | #
 3 | #   pre-commit run -a
 4 | #
 5 | # Or:
 6 | #
 7 | #   pre-commit install  # (runs every time you commit in git)
 8 | #
 9 | # To update this file:
10 | #
11 | #   pre-commit autoupdate
12 | #
13 | # See https://github.com/pre-commit/pre-commit
14 | 
15 | # Note the pre-commit hooks should only be used for formatting, but not for linting.
16 | # For linting consider using CI.
17 | 
18 | repos:
19 |   # Standard hooks
20 |   - repo: https://github.com/pre-commit/pre-commit-hooks
21 |     rev: v5.0.0
22 |     hooks:
23 |       - id: check-added-large-files
24 |       - id: check-case-conflict
25 |       - id: check-merge-conflict
26 |       - id: check-symlinks
27 |       - id: end-of-file-fixer
28 |       - id: mixed-line-ending
29 |       - id: requirements-txt-fixer
30 |       - id: trailing-whitespace
31 | 
32 |   # Changes tabs to spaces
33 |   - repo: https://github.com/Lucas-C/pre-commit-hooks
34 |     rev: v1.5.5
35 |     hooks:
36 |       - id: remove-tabs
37 |       - id: remove-crlf
38 | 
39 |   # Formatters
40 |   - repo: https://github.com/psf/black-pre-commit-mirror
41 |     rev: 25.1.0
42 |     hooks:
43 |       - id: black
44 | 
45 |   - repo: https://github.com/pycqa/isort
46 |     rev: 6.0.1
47 |     hooks:
48 |       - id: isort
49 |         args: ["--profile=black"]
50 | 
51 |   - repo: https://github.com/google/yamlfmt
52 |     rev: v0.16.0
53 |     hooks:
54 |       - id: yamlfmt
55 | 
56 |   - repo: https://github.com/ComPWA/taplo-pre-commit
57 |     rev: v0.9.3
58 |     hooks:
59 |       - id: taplo-format
60 |         args: ["--option", "column_width=100"]
61 | 


--------------------------------------------------------------------------------
/web/apps/web/app/models/[id]/page.tsx:
--------------------------------------------------------------------------------
 1 | import { notFound } from "next/navigation"
 2 | import Link from "next/link"
 3 | import { ArrowLeft } from "lucide-react"
 4 | import { getModel, getAllModels } from "@/lib/data-loader"
 5 | import { ModelTabs } from "./model-tabs"
 6 | 
 7 | export async function generateStaticParams() {
 8 |   const models = await getAllModels()
 9 |   return models.map((model) => ({
10 |     id: model.id,
11 |   }))
12 | }
13 | 
14 | export default async function ModelDetailPage({
15 |   params,
16 |   searchParams
17 | }: {
18 |   params: Promise<{ id: string }>
19 |   searchParams: Promise<{ from?: string }>
20 | }) {
21 |   const { id } = await params
22 |   const { from } = await searchParams
23 |   const model = await getModel(id)
24 | 
25 |   if (!model) {
26 |     notFound()
27 |   }
28 | 
29 |   return (
30 |     <div className="container py-8">
31 |       <div className="space-y-6">
32 |         <div className="flex items-center gap-2">
33 |           <Link
34 |             href={from === 'models' ? '/models' : '/'}
35 |             className="text-sm text-muted-foreground hover:text-foreground"
36 |           >
37 |             <ArrowLeft className="h-4 w-4 inline mr-1" />
38 |             Back to {from === 'models' ? 'models' : 'home'}
39 |           </Link>
40 |         </div>
41 | 
42 |         <div>
43 |           <h1 className="text-3xl font-bold mb-2">{model.name}</h1>
44 |           {model.description && (
45 |             <p className="text-muted-foreground">{model.description}</p>
46 |           )}
47 |         </div>
48 | 
49 |         <ModelTabs model={model} />
50 |       </div>
51 |     </div>
52 |   )
53 | }
54 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/avatar.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import * as React from "react"
 4 | import * as AvatarPrimitive from "@radix-ui/react-avatar"
 5 | 
 6 | import { cn } from "@flashinfer-bench/utils"
 7 | 
 8 | const Avatar = React.forwardRef<
 9 |   React.ElementRef<typeof AvatarPrimitive.Root>,
10 |   React.ComponentPropsWithoutRef<typeof AvatarPrimitive.Root>
11 | >(({ className, ...props }, ref) => (
12 |   <AvatarPrimitive.Root
13 |     ref={ref}
14 |     className={cn(
15 |       "relative flex h-10 w-10 shrink-0 overflow-hidden rounded-full",
16 |       className
17 |     )}
18 |     {...props}
19 |   />
20 | ))
21 | Avatar.displayName = AvatarPrimitive.Root.displayName
22 | 
23 | const AvatarImage = React.forwardRef<
24 |   React.ElementRef<typeof AvatarPrimitive.Image>,
25 |   React.ComponentPropsWithoutRef<typeof AvatarPrimitive.Image>
26 | >(({ className, ...props }, ref) => (
27 |   <AvatarPrimitive.Image
28 |     ref={ref}
29 |     className={cn("aspect-square h-full w-full", className)}
30 |     {...props}
31 |   />
32 | ))
33 | AvatarImage.displayName = AvatarPrimitive.Image.displayName
34 | 
35 | const AvatarFallback = React.forwardRef<
36 |   React.ElementRef<typeof AvatarPrimitive.Fallback>,
37 |   React.ComponentPropsWithoutRef<typeof AvatarPrimitive.Fallback>
38 | >(({ className, ...props }, ref) => (
39 |   <AvatarPrimitive.Fallback
40 |     ref={ref}
41 |     className={cn(
42 |       "flex h-full w-full items-center justify-center rounded-full bg-muted",
43 |       className
44 |     )}
45 |     {...props}
46 |   />
47 | ))
48 | AvatarFallback.displayName = AvatarPrimitive.Fallback.displayName
49 | 
50 | export { Avatar, AvatarImage, AvatarFallback }
51 | 


--------------------------------------------------------------------------------
/docs/api/build_docs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Build Sphinx documentation locally
 3 | #
 4 | # Usage:
 5 | #   ./build_docs.sh          # Build HTML docs
 6 | #   ./build_docs.sh clean    # Clean build directory
 7 | #   ./build_docs.sh deps     # Install dependencies
 8 | #   ./build_docs.sh serve    # Build and serve locally
 9 | 
10 | set -euo pipefail
11 | 
12 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
13 | REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
14 | BUILD_DIR="$SCRIPT_DIR/_build"
15 | 
16 | cd "$SCRIPT_DIR"
17 | 
18 | # Install dependencies if needed
19 | install_deps() {
20 |     echo "Installing documentation dependencies..."
21 |     pip install -r requirements.txt
22 |     pip install -e "$REPO_ROOT"
23 | }
24 | 
25 | # Clean build directory
26 | clean() {
27 |     echo "Cleaning build directory..."
28 |     rm -rf "$BUILD_DIR"
29 | }
30 | 
31 | # Build HTML documentation
32 | build() {
33 |     echo "Building HTML documentation..."
34 |     sphinx-build -b html . "$BUILD_DIR/html" -W --keep-going
35 |     echo ""
36 |     echo "Documentation built successfully!"
37 |     echo "Open: file://$BUILD_DIR/html/index.html"
38 | }
39 | 
40 | # Serve documentation locally
41 | serve() {
42 |     build
43 |     echo ""
44 |     echo "Serving documentation at http://localhost:8000"
45 |     python -m http.server 8000 --directory "$BUILD_DIR/html"
46 | }
47 | 
48 | case "${1:-build}" in
49 |     clean)
50 |         clean
51 |         ;;
52 |     deps)
53 |         install_deps
54 |         ;;
55 |     serve)
56 |         serve
57 |         ;;
58 |     build|"")
59 |         build
60 |         ;;
61 |     *)
62 |         echo "Usage: $0 {build|clean|deps|serve}"
63 |         exit 1
64 |         ;;
65 | esac
66 | 


--------------------------------------------------------------------------------
/docs/op_type_schema/moe.md:
--------------------------------------------------------------------------------
 1 | # moe
 2 | 
 3 | Mixture of Experts (MoE) divides computation among multiple expert subnetworks. The MoE layer in DeepSeek is a transformer feed-forward block replaced by a sparse mixture of many MLP experts, where only a few are chosen for each token by a gating network.
 4 | 
 5 | Axes (9 dimensions):
 6 | - `seq_len`: variable
 7 | - `num_experts`, `num_local_experts`, `hidden_size`, `intermediate_size`, `gemm1_out_size`, `num_hidden_blocks`, `num_intermediate_blocks`, `num_gemm1_out_blocks`: constant
 8 | 
 9 | Inputs (8 tensors + 2 scalars):
10 | - `routing_logits`: Tensor of routing logits for expert selection [seq_len, num_experts]
11 | - `routing_bias`: Bias tensor for routing. Pass all zeros for no bias [num_experts]
12 | - `hidden_states`: Input hidden states tensor (FP8 quantized) [seq_len, hidden_size]
13 | - `hidden_states_scale`: Block-wise scaling factors for hidden states [num_hidden_blocks, seq_len]
14 | - `gemm1_weights`: First GEMM weights for all local experts (gate and up projections) [num_local_experts, gemm1_out_size, hidden_size]
15 | - `gemm1_weights_scale`: Block-wise scaling factors for first GEMM weights [num_local_experts, num_gemm1_out_blocks, num_hidden_blocks]
16 | - `gemm2_weights`: Second GEMM weights for all local experts (down projection) [num_local_experts, hidden_size, intermediate_size]
17 | - `gemm2_weights_scale`: Block-wise scaling factors for second GEMM weights [num_local_experts, num_hidden_blocks, num_intermediate_blocks]
18 | - `local_expert_offset`: Offset of local experts in global expert space (scalar)
19 | - `routed_scaling_factor`: Scaling factor for routing weights (scalar)
20 | 
21 | Outputs (1 tensor):
22 | - `output`: Final MoE output tensor [seq_len, hidden_size]
23 | 


--------------------------------------------------------------------------------
/flashinfer_bench/__init__.py:
--------------------------------------------------------------------------------
 1 | from flashinfer_bench.apply import apply, disable_apply, enable_apply
 2 | from flashinfer_bench.bench import Benchmark, BenchmarkConfig
 3 | from flashinfer_bench.data import (
 4 |     AxisConst,
 5 |     AxisVar,
 6 |     BuildSpec,
 7 |     Correctness,
 8 |     Definition,
 9 |     Environment,
10 |     Evaluation,
11 |     EvaluationStatus,
12 |     Performance,
13 |     RandomInput,
14 |     SafetensorsInput,
15 |     Solution,
16 |     SourceFile,
17 |     SupportedLanguages,
18 |     TensorSpec,
19 |     Trace,
20 |     TraceSet,
21 |     Workload,
22 | )
23 | from flashinfer_bench.logging import configure_logging, get_logger
24 | from flashinfer_bench.tracing import (
25 |     TracingConfig,
26 |     disable_tracing,
27 |     enable_tracing,
28 |     get_tracing_runtime,
29 | )
30 | 
31 | try:
32 |     from ._version import __version__, __version_tuple__
33 | except Exception:
34 |     __version__ = "0.0.0.dev0"
35 |     __version_tuple__ = (0, 0, 0, "dev0")
36 | 
37 | __all__ = [
38 |     # Main classes
39 |     "Benchmark",
40 |     "BenchmarkConfig",
41 |     # Apply API
42 |     "apply",
43 |     "enable_apply",
44 |     "disable_apply",
45 |     # Tracing API
46 |     "enable_tracing",
47 |     "get_tracing_runtime",
48 |     "disable_tracing",
49 |     "TracingConfig",
50 |     "Definition",
51 |     "Solution",
52 |     "Trace",
53 |     "TraceSet",
54 |     # Definition types
55 |     "AxisConst",
56 |     "AxisVar",
57 |     "TensorSpec",
58 |     # Solution types
59 |     "SourceFile",
60 |     "BuildSpec",
61 |     "SupportedLanguages",
62 |     # Trace types
63 |     "RandomInput",
64 |     "SafetensorsInput",
65 |     "Workload",
66 |     "Correctness",
67 |     "Performance",
68 |     "Environment",
69 |     "Evaluation",
70 |     "EvaluationStatus",
71 |     "configure_logging",
72 |     "get_logger",
73 | ]
74 | 


--------------------------------------------------------------------------------
/web/apps/docs/app/layout.jsx:
--------------------------------------------------------------------------------
 1 | import './globals.css'
 2 | 
 3 | import { Inter } from 'next/font/google'
 4 | import { Layout } from 'nextra-theme-docs'
 5 | import { links } from '@flashinfer-bench/config'
 6 | import { Head, Search } from 'nextra/components'
 7 | import { SiteFooter, SiteHeader } from '@flashinfer-bench/ui'
 8 | import { getPageMap } from 'nextra/page-map'
 9 | import 'nextra-theme-docs/style.css'
10 | 
11 | export const metadata = {
12 |   // Define your metadata here
13 |   // For more information on metadata API, see: https://nextjs.org/docs/app/building-your-application/optimizing/metadata
14 | }
15 | 
16 | const APP_HOME = process.env.NEXT_PUBLIC_APP_HOME ?? 'https://bench.flashinfer.ai'
17 | 
18 | const inter = Inter({ subsets: ['latin'] })
19 | 
20 | const navbar = (
21 |   <SiteHeader
22 |     logoHref={APP_HOME}
23 |     navItems={[]}
24 |     searchSlot={<Search />}
25 |   />
26 | )
27 | export default async function RootLayout({ children }) {
28 |   return (
29 |     <html
30 |       // Not required, but good for SEO
31 |       lang="en"
32 |       // Required to be set
33 |       dir="ltr"
34 |       // Suggested by `next-themes` package https://github.com/pacocoursey/next-themes#with-app
35 |       suppressHydrationWarning
36 |     >
37 |       <Head
38 |       // ... Your additional head options
39 |       >
40 |         {/* Your additional tags should be passed as `children` of `<Head>` element */}
41 |       </Head>
42 |       <body className={inter.className}>
43 |         <Layout
44 |           navbar={navbar}
45 |           pageMap={await getPageMap()}
46 |           docsRepositoryBase={links.docsRepositoryBase}
47 |           footer={null}
48 |           // ... Your additional layout options
49 |         >
50 |           {children}
51 |         </Layout>
52 |         <SiteFooter />
53 |       </body>
54 |     </html>
55 |   )
56 | }
57 | 


--------------------------------------------------------------------------------
/flashinfer_bench/env.py:
--------------------------------------------------------------------------------
 1 | """Defines the environment variables used in FlashInfer-Bench."""
 2 | 
 3 | import os
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | def get_fib_enable_apply() -> bool:
 8 |     """Get the value of the FIB_ENABLE_APPLY environment variable. It controls whether the apply
 9 |     functionality is enabled.
10 | 
11 |     Returns
12 |     -------
13 |     bool
14 |         The value of the FIB_ENABLE_APPLY environment variable.
15 |     """
16 |     return os.environ.get("FIB_ENABLE_APPLY", "0") == "1"
17 | 
18 | 
19 | def get_fib_enable_tracing() -> bool:
20 |     """Get the value of the FIB_ENABLE_TRACING environment variable. It controls whether the tracing
21 |     functionality is enabled.
22 | 
23 |     Returns
24 |     -------
25 |     bool
26 |         The value of the FIB_ENABLE_TRACING environment variable.
27 |     """
28 |     return os.environ.get("FIB_ENABLE_TRACING", "0") == "1"
29 | 
30 | 
31 | def get_fib_dataset_path() -> Path:
32 |     """Get the value of the FIB_DATASET_PATH environment variable. It controls the path to the
33 |     dataset to dump or to load.
34 | 
35 |     Returns
36 |     -------
37 |     Path
38 |         The value of the FIB_DATASET_PATH environment variable.
39 |     """
40 |     value = os.environ.get("FIB_DATASET_PATH")
41 |     if value:
42 |         return Path(value).expanduser()
43 |     return Path(Path.home() / ".cache" / "flashinfer_bench" / "dataset")
44 | 
45 | 
46 | def get_fib_cache_path() -> Path:
47 |     """Get the value of the FIB_CACHE_PATH environment variable. It controls the path to the cache.
48 | 
49 |     Returns
50 |     -------
51 |     Path
52 |         The value of the FIB_CACHE_PATH environment variable.
53 |     """
54 |     value = os.environ.get("FIB_CACHE_PATH")
55 |     if value:
56 |         return Path(value).expanduser()
57 |     return Path.home() / ".cache" / "flashinfer_bench" / "cache"
58 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/alert.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import { cva, type VariantProps } from "class-variance-authority"
 3 | 
 4 | import { cn } from "@flashinfer-bench/utils"
 5 | 
 6 | const alertVariants = cva(
 7 |   "relative w-full rounded-lg border p-4 [&>svg~*]:pl-7 [&>svg+div]:translate-y-[-3px] [&>svg]:absolute [&>svg]:left-4 [&>svg]:top-4 [&>svg]:text-foreground",
 8 |   {
 9 |     variants: {
10 |       variant: {
11 |         default: "bg-background text-foreground",
12 |         destructive:
13 |           "border-destructive/50 text-destructive dark:border-destructive [&>svg]:text-destructive",
14 |       },
15 |     },
16 |     defaultVariants: {
17 |       variant: "default",
18 |     },
19 |   }
20 | )
21 | 
22 | const Alert = React.forwardRef<
23 |   HTMLDivElement,
24 |   React.HTMLAttributes<HTMLDivElement> & VariantProps<typeof alertVariants>
25 | >(({ className, variant, ...props }, ref) => (
26 |   <div
27 |     ref={ref}
28 |     role="alert"
29 |     className={cn(alertVariants({ variant }), className)}
30 |     {...props}
31 |   />
32 | ))
33 | Alert.displayName = "Alert"
34 | 
35 | const AlertTitle = React.forwardRef<
36 |   HTMLParagraphElement,
37 |   React.HTMLAttributes<HTMLHeadingElement>
38 | >(({ className, ...props }, ref) => (
39 |   <h5
40 |     ref={ref}
41 |     className={cn("mb-1 font-medium leading-none tracking-tight", className)}
42 |     {...props}
43 |   />
44 | ))
45 | AlertTitle.displayName = "AlertTitle"
46 | 
47 | const AlertDescription = React.forwardRef<
48 |   HTMLParagraphElement,
49 |   React.HTMLAttributes<HTMLParagraphElement>
50 | >(({ className, ...props }, ref) => (
51 |   <div
52 |     ref={ref}
53 |     className={cn("text-sm [&_p]:leading-relaxed", className)}
54 |     {...props}
55 |   />
56 | ))
57 | AlertDescription.displayName = "AlertDescription"
58 | 
59 | export { Alert, AlertTitle, AlertDescription }
60 | 


--------------------------------------------------------------------------------
/docs/op_type_schema/gqa_paged.md:
--------------------------------------------------------------------------------
 1 | # gqa_paged
 2 | 
 3 | Grouped Query Attention (GQA) with paged memory layout. GQA is an attention mechanism that reduces memory usage by grouping multiple query heads to share the same key-value heads, while using a paged memory system for efficient KV cache management. This allows for variable-length sequences and better memory utilization compared to traditional attention.
 4 | 
 5 | Variants:
 6 | - prefill
 7 | - decode
 8 | 
 9 | ## prefill
10 | 
11 | Axes (8 dimensions):
12 | - `total_q`, `num_pages`, `len_indptr`, `num_kv_indices`: variable
13 | - `num_qo_heads`, `num_kv_heads`, `head_dim`, `page_size`: constant
14 | 
15 | Inputs (6 tensors + 1 scalar):
16 | - `q`: query tensor [total_q, num_qo_heads, head_dim]
17 | - `k_cache`, `v_cache`: paged KV cache [num_pages, page_size, num_kv_heads, head_dim]
18 | - `qo_indptr`, `kv_indptr`, `kv_indices`: paging indices
19 | - `sm_scale`: softmax scale (scalar)
20 | 
21 | Outputs (2 tensors):
22 | - `output`: attention output [total_q, num_qo_heads, head_dim]
23 | - `lse`: log-sum-exp values [total_q, num_qo_heads]
24 | 
25 | Constraints:
26 | - `total_q == qo_indptr[-1]`
27 | - `num_kv_indices = kv_indptr[-1]`
28 | 
29 | ## decode
30 | 
31 | Axes (8 dimensions):
32 | - `total_q`, `num_pages`, `len_indptr`, `num_kv_indices`: variable
33 | - `num_qo_heads`, `num_kv_heads`, `head_dim`, `page_size`: constant
34 | 
35 | Inputs (5 tensors + 1 scalar):
36 | - `q`: query tensor [total_q, num_qo_heads, head_dim]
37 | - `k_cache`, `v_cache`: paged KV cache [num_pages, page_size, num_kv_heads, head_dim]
38 | - `kv_indptr`, `kv_indices`: paging indices
39 | - `sm_scale`: softmax scale (scalar)
40 | 
41 | Outputs (2 tensors):
42 | - `output`: attention output [total_q, num_qo_heads, head_dim]
43 | - `lse`: log-sum-exp values [total_q, num_qo_heads]
44 | 
45 | Constraints:
46 | - `len_indptr = num_pages + 1`
47 | - `num_kv_indices = kv_indptr[-1]`
48 | 


--------------------------------------------------------------------------------
/web/apps/docs/gen_api_docs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | # Resolve repo root from this script's location: web/apps/doc/gen_api_docs.sh
 5 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 6 | ROOT_DIR="$(cd "$SCRIPT_DIR/../../.." && pwd)"
 7 | 
 8 | cd "$ROOT_DIR"
 9 | 
10 | if ! command -v pydoc-markdown >/dev/null 2>&1; then
11 |   echo "pydoc-markdown is not installed. Install with:"
12 |   echo "  pip install pydoc-markdown>=4"
13 |   exit 1
14 | fi
15 | 
16 | echo "Generating API docs → docs/api/reference.md (from flashinfer_bench) ..."
17 | 
18 | # Build an explicit module list from the filesystem to include submodules
19 | mapfile -t MODULES < <( \
20 |   find flashinfer_bench -type f -name "*.py" \
21 |   | sed -e 's#^flashinfer_bench/##' -e 's#/__init__\.py$##' -e 's#\.py$##' \
22 |   | sed -e 's#/#.#g' \
23 |   | grep -vE '^__init__$' \
24 |   | awk 'NF{print "flashinfer_bench" (length($0)? "." $0:"")}' \
25 |   | sort -u \
26 | )
27 | 
28 | # Generate a temporary YAML config that enumerates all modules
29 | TMP_YML=$(mktemp -t pydocmd.XXXXXX.yml)
30 | OUT_MD="$ROOT_DIR/docs/api/reference.md"
31 | echo "Found ${#MODULES[@]} modules. Writing to $OUT_MD"
32 | {
33 |   echo "loaders:"
34 |   echo "  - type: python"
35 |   echo "    search_path: [\"$ROOT_DIR\"]"
36 |   echo "    modules:"
37 |   for m in "${MODULES[@]}"; do
38 |     echo "      - \"$m\""
39 |   done
40 |   echo "processors:"
41 |   echo "  - type: smart"
42 |   echo "renderer:"
43 |   echo "  type: markdown"
44 |   echo "  filename: $OUT_MD"
45 |   echo "  render_toc: false"
46 | } > "$TMP_YML"
47 | 
48 | set +e
49 | pydoc-markdown "$TMP_YML"
50 | STATUS=$?
51 | set -e
52 | rm -f "$TMP_YML"
53 | 
54 | if [ $STATUS -ne 0 ]; then
55 |   echo "Failed to generate API docs. Ensure flashinfer_bench is importable in this Python env." >&2
56 |   echo "Try: pip install -e .  and re-run." >&2
57 |   exit $STATUS
58 | fi
59 | 
60 | echo "Done."
61 | 


--------------------------------------------------------------------------------
/tests/test_logging.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import List
 3 | 
 4 | import pytest
 5 | 
 6 | import flashinfer_bench.logging as fib_logging
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def package_logger_state() -> logging.Logger:
11 |     logger = logging.getLogger("flashinfer-bench")
12 |     original_level = logger.level
13 |     original_handlers: List[logging.Handler] = list(logger.handlers)
14 |     original_propagate = logger.propagate
15 | 
16 |     yield logger
17 | 
18 |     logger.handlers.clear()
19 |     for handler in original_handlers:
20 |         logger.addHandler(handler)
21 |     logger.setLevel(original_level)
22 |     logger.propagate = original_propagate
23 | 
24 | 
25 | def test_get_logger_scopes_name(package_logger_state: logging.Logger) -> None:
26 |     logger = fib_logging.get_logger()
27 |     assert logger.name == "flashinfer-bench"
28 | 
29 |     scoped = fib_logging.get_logger("bench.tests")
30 |     assert scoped.name == "flashinfer-bench.bench.tests"
31 | 
32 | 
33 | def test_configure_logging_with_custom_handler(package_logger_state: logging.Logger) -> None:
34 |     records = []
35 | 
36 |     class CollectHandler(logging.Handler):
37 |         def emit(self, record: logging.LogRecord) -> None:
38 |             records.append(record)
39 | 
40 |     handler = CollectHandler()
41 |     formatter = logging.Formatter("%(levelname)s:%(message)s")
42 | 
43 |     logger = fib_logging.configure_logging(level="warning", handler=handler, formatter=formatter)
44 | 
45 |     logger.warning("hello")
46 | 
47 |     assert logger.level == logging.WARNING
48 |     assert logger.handlers == [handler]
49 |     assert handler.formatter is formatter
50 |     assert records and records[-1].getMessage() == "hello"
51 |     assert not logger.propagate
52 | 
53 |     handler.close()
54 | 
55 | 
56 | def test_configure_logging_rejects_unknown_level(package_logger_state: logging.Logger) -> None:
57 |     with pytest.raises(ValueError):
58 |         fib_logging.configure_logging(level="LOUD")
59 | 


--------------------------------------------------------------------------------
/licenses/cutlass.LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | SPDX-License-Identifier: BSD-3-Clause
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 | list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 | 
14 | 3. Neither the name of the copyright holder nor the names of its
15 | contributors may be used to endorse or promote products derived from
16 | this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | Certain files within this repository are subject to separate licensing terms:
30 | 
31 | - The files located in the `python/CuTeDSL` directory are licensed under the
32 |   NVIDIA End User License Agreement (EULA). Please refer to
33 |   https://docs.nvidia.com/cutlass/media/docs/pythonDSL/license.html
34 |   for the full terms.
35 | 


--------------------------------------------------------------------------------
/tests/apply/test_key.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import json
 4 | import sys
 5 | 
 6 | import pytest
 7 | 
 8 | from flashinfer_bench.apply.key import ApplyKey, ApplyKeyFactory
 9 | from flashinfer_bench.data import AxisConst, AxisVar, Definition, TensorSpec
10 | 
11 | 
12 | class FakeTensor:
13 |     def __init__(self, shape):
14 |         self.shape = tuple(shape)
15 | 
16 | 
17 | def make_minimal_def() -> Definition:
18 |     return Definition(
19 |         name="add",
20 |         op_type="op",
21 |         axes={"M": AxisVar(), "N": AxisConst(value=2)},
22 |         inputs={
23 |             "X": TensorSpec(shape=["M", "N"], dtype="float32"),
24 |             "Y": TensorSpec(shape=["M", "N"], dtype="float32"),
25 |         },
26 |         outputs={"Z": TensorSpec(shape=["M", "N"], dtype="float32")},
27 |         reference="def run(X, Y):\n    return X\n",
28 |     )
29 | 
30 | 
31 | def test_applykey_json_roundtrip():
32 |     k = ApplyKey(axes=(("M", 2), ("N", 4)), feats=(("avg", 1.5), ("flag", True)))
33 |     s = k.model_dump_json()
34 |     # ensure stable json
35 |     json.loads(s)
36 |     k2 = ApplyKey.model_validate_json(s)
37 |     assert k2 == k
38 |     assert hash(k2) == hash(k)
39 | 
40 | 
41 | def test_axes_only_key_builder_materializes_axes():
42 |     d = make_minimal_def()
43 |     builder = ApplyKeyFactory.specialize(d)
44 | 
45 |     # Valid runtime args (positional)
46 |     key = builder.build_from_args((FakeTensor((4, 2)), FakeTensor((4, 2))))
47 |     # Only var axes are materialized; const axes are not included in key.
48 |     assert dict(key.axes) == {"M": 4}
49 |     assert dict(key.axes).get("M") == 4
50 |     assert "N" not in dict(key.axes)
51 | 
52 |     # Rank too small: X is expected to have at least 1 dim at index 0; providing 0-dim tensor causes error
53 |     with pytest.raises((ValueError, IndexError)):
54 |         builder.build_from_args((FakeTensor(()), FakeTensor((4, 2))))
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     pytest.main(sys.argv)
59 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/progress-circle.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import React from "react"
 4 | 
 5 | interface ProgressCircleProps {
 6 |   value: number
 7 |   max: number
 8 |   size?: number
 9 |   strokeWidth?: number
10 |   className?: string
11 |   showText?: boolean
12 | }
13 | 
14 | export function ProgressCircle({
15 |   value,
16 |   max,
17 |   size = 40,
18 |   strokeWidth = 3,
19 |   className = "",
20 |   showText = false
21 | }: ProgressCircleProps) {
22 |   const percentage = max > 0 ? (value / max) * 100 : 0
23 |   const radius = (size - strokeWidth) / 2
24 |   const circumference = radius * 2 * Math.PI
25 |   const strokeDashoffset = circumference - (percentage / 100) * circumference
26 | 
27 |   return (
28 |     <div className={`relative inline-flex items-center justify-center ${className}`}>
29 |       <svg
30 |         width={size}
31 |         height={size}
32 |         className="transform -rotate-90"
33 |       >
34 |         {/* Background circle */}
35 |         <circle
36 |           cx={size / 2}
37 |           cy={size / 2}
38 |           r={radius}
39 |           stroke="currentColor"
40 |           strokeWidth={strokeWidth}
41 |           fill="none"
42 |           className="text-gray-200 dark:text-gray-700"
43 |         />
44 |         {/* Progress circle */}
45 |         <circle
46 |           cx={size / 2}
47 |           cy={size / 2}
48 |           r={radius}
49 |           stroke="currentColor"
50 |           strokeWidth={strokeWidth}
51 |           fill="none"
52 |           strokeDasharray={circumference}
53 |           strokeDashoffset={strokeDashoffset}
54 |           strokeLinecap="round"
55 |           className="text-blue-400 transition-all duration-300"
56 |         />
57 |       </svg>
58 |       {showText && (
59 |         <div className="absolute inset-0 flex items-center justify-center">
60 |           <span className="text-xs font-medium">
61 |             {value}/{max}
62 |           </span>
63 |         </div>
64 |       )}
65 |     </div>
66 |   )
67 | }
68 | 


--------------------------------------------------------------------------------
/web/packages/ui/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@flashinfer-bench/ui",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "main": "src/index.ts",
 6 |   "types": "src/index.ts",
 7 |   "license": "UNLICENSED",
 8 |   "sideEffects": false,
 9 |   "exports": {
10 |     ".": {
11 |       "types": "./src/index.ts",
12 |       "import": "./src/index.ts"
13 |     },
14 |     "./brand/Logo": "./src/brand/Logo.tsx"
15 |   },
16 |   "peerDependencies": {
17 |     "next": "^15",
18 |     "react": "^18 || ^19",
19 |     "react-dom": "^18 || ^19",
20 |     "class-variance-authority": "^0.7.1"
21 |   },
22 |   "peerDependenciesMeta": {
23 |     "class-variance-authority": { "optional": true },
24 |     "@radix-ui/react-avatar": { "optional": true },
25 |     "@radix-ui/react-dialog": { "optional": true },
26 |     "@radix-ui/react-dropdown-menu": { "optional": true },
27 |     "@radix-ui/react-hover-card": { "optional": true },
28 |     "@radix-ui/react-label": { "optional": true },
29 |     "@radix-ui/react-select": { "optional": true },
30 |     "@radix-ui/react-separator": { "optional": true },
31 |     "@radix-ui/react-slot": { "optional": true },
32 |     "lucide-react": { "optional": true },
33 |     "@tanstack/react-query": { "optional": true },
34 |     "next-themes": { "optional": true }
35 |   },
36 |   "dependencies": {
37 |     "@flashinfer-bench/utils": "workspace:*"
38 |   },
39 |   "devDependencies": {
40 |     "@types/react": "^19.1.13",
41 |     "@types/react-dom": "^19.1.9",
42 |     "@radix-ui/react-avatar": "^1.1.10",
43 |     "@radix-ui/react-dialog": "^1.1.14",
44 |     "@radix-ui/react-dropdown-menu": "^2.1.15",
45 |     "@radix-ui/react-hover-card": "^1.1.14",
46 |     "@radix-ui/react-label": "^2.1.7",
47 |     "@radix-ui/react-select": "^2.2.5",
48 |     "@radix-ui/react-separator": "^1.1.7",
49 |     "@radix-ui/react-slot": "^1.2.3",
50 |     "@radix-ui/react-tabs": "^1.1.12",
51 |     "@radix-ui/react-toast": "^1.2.14",
52 |     "lucide-react": "^0.525.0",
53 |     "@tanstack/react-query": "^5.81.5",
54 |     "next-themes": "^0.4.6"
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/web/apps/web/app/models.tsx:
--------------------------------------------------------------------------------
 1 | import Link from "next/link"
 2 | import { Button, Card, CardContent, CardHeader } from "@flashinfer-bench/ui"
 3 | import { ArrowRight } from "lucide-react"
 4 | import { ModelCard } from "@/components/model-card"
 5 | import type { Model } from "@/lib/schemas"
 6 | 
 7 | type ModelsSectionProps = {
 8 |   models: Model[]
 9 | }
10 | 
11 | export function ModelsSection({ models }: ModelsSectionProps) {
12 |   const showViewAll = models.length > 3
13 | 
14 |   return (
15 |     <section className="container space-y-6 py-8 md:py-12">
16 |       <div className="flex items-center justify-between">
17 |         <div className="space-y-1">
18 |           <h2 className="text-3xl font-bold tracking-tight">Models</h2>
19 |           <p className="text-muted-foreground">
20 |             Explore model architectures and their kernel implementations
21 |           </p>
22 |         </div>
23 |         {showViewAll && (
24 |           <Button asChild variant="ghost">
25 |             <Link href="/models">
26 |               View all <ArrowRight className="ml-2 h-4 w-4" />
27 |             </Link>
28 |           </Button>
29 |         )}
30 |       </div>
31 |       <div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-3">
32 |         {models.map((model) => (
33 |           <ModelCard
34 |             key={model.id}
35 |             model={model}
36 |             href={`/models/${model.id}`}
37 |           />
38 |         ))}
39 |         {models.length === 0 && (
40 |           <>
41 |             {[1, 2, 3].map((i) => (
42 |               <Card key={i} className="animate-pulse">
43 |                 <CardHeader>
44 |                   <div className="h-5 w-32 rounded bg-muted" />
45 |                   <div className="mt-2 h-4 w-48 rounded bg-muted" />
46 |                 </CardHeader>
47 |                 <CardContent>
48 |                   <div className="h-4 w-24 rounded bg-muted" />
49 |                 </CardContent>
50 |               </Card>
51 |             ))}
52 |           </>
53 |         )}
54 |       </div>
55 |     </section>
56 |   )
57 | }
58 | 


--------------------------------------------------------------------------------
/web/apps/web/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@flashinfer-bench/web",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "prebuild": "node scripts/prebuild.mjs",
 7 |     "dev": "next dev",
 8 |     "build": "next build",
 9 |     "start": "next start",
10 |     "lint": "next lint"
11 |   },
12 |   "dependencies": {
13 |     "@flashinfer-bench/config": "workspace:*",
14 |     "@flashinfer-bench/ui": "workspace:*",
15 |     "@flashinfer-bench/utils": "workspace:*",
16 |     "@monaco-editor/react": "^4.7.0",
17 |     "@radix-ui/react-avatar": "^1.1.10",
18 |     "@radix-ui/react-dialog": "^1.1.14",
19 |     "@radix-ui/react-dropdown-menu": "^2.1.15",
20 |     "@radix-ui/react-hover-card": "^1.1.14",
21 |     "@radix-ui/react-label": "^2.1.7",
22 |     "@radix-ui/react-select": "^2.2.5",
23 |     "@radix-ui/react-separator": "^1.1.7",
24 |     "@radix-ui/react-slot": "^1.2.3",
25 |     "@radix-ui/react-tabs": "^1.1.12",
26 |     "@radix-ui/react-toast": "^1.2.14",
27 |     "@react-three/drei": "^10.6.1",
28 |     "@react-three/fiber": "^9.3.0",
29 |     "@tanstack/react-query": "^5.81.5",
30 |     "@types/d3": "^7.4.3",
31 |     "@types/three": "^0.179.0",
32 |     "@vercel/analytics": "^1.5.0",
33 |     "ajv": "^8.17.1",
34 |     "class-variance-authority": "^0.7.1",
35 |     "clsx": "^2.1.1",
36 |     "d3": "^7.9.0",
37 |     "lucide-react": "^0.525.0",
38 |     "next": "^15.3.4",
39 |     "next-themes": "^0.4.6",
40 |     "react": "^19.1.0",
41 |     "react-dom": "^19.1.0",
42 |     "reactflow": "^11.11.4",
43 |     "strip-json-comments": "^5.0.2",
44 |     "tailwind-merge": "^3.3.1",
45 |     "tailwindcss-animate": "^1.0.7",
46 |     "three": "^0.179.1",
47 |     "zod": "^3.25.67",
48 |     "zustand": "^5.0.6"
49 |   },
50 |   "devDependencies": {
51 |     "@tailwindcss/postcss": "^4.1.11",
52 |     "@types/node": "^24.0.8",
53 |     "@types/react": "^19.1.8",
54 |     "@types/react-dom": "^19.1.6",
55 |     "eslint": "^8",
56 |     "eslint-config-next": "^15.3.4",
57 |     "postcss": "^8.5.6",
58 |     "tailwindcss": "^4.1.11",
59 |     "typescript": "^5.8.3"
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/tests/integration/test_patch_manager.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | 
 3 | from flashinfer_bench.integration.patch_manager import PatchManager, PatchSpec
 4 | 
 5 | 
 6 | def test_patch_manager_function_patch_and_unpatch():
 7 |     # Ensure importable
 8 |     mod = importlib.import_module("tests.integration.samplemods.pm_dummy")
 9 | 
10 |     pm = PatchManager()
11 | 
12 |     spec = PatchSpec(
13 |         path="tests.integration.samplemods.pm_dummy.module_function",
14 |         kind="function",
15 |         name="ut_patch",
16 |     )
17 | 
18 |     called = {"wrapped": False}
19 | 
20 |     def wf(spec, orig):
21 |         def wrapped(a, b=3):
22 |             called["wrapped"] = True
23 |             return ("wrapped", orig(a, b))
24 | 
25 |         return wrapped
26 | 
27 |     assert pm.patch(spec, wf) is True
28 |     # Patched behavior
29 |     assert mod.module_function(4) == ("wrapped", 12)
30 |     assert called["wrapped"] is True
31 | 
32 |     pm.unpatch_all()
33 |     # Original behavior restored
34 |     assert mod.module_function(4) == 12
35 | 
36 | 
37 | def test_patch_manager_method_patch_idempotent():
38 |     mod = importlib.import_module("tests.integration.samplemods.pm_dummy")
39 | 
40 |     pm = PatchManager()
41 |     spec = PatchSpec(
42 |         path="tests.integration.samplemods.pm_dummy.Foo.instance_method",
43 |         kind="method",
44 |         name="ut_patch_method",
45 |     )
46 | 
47 |     def wf(spec, orig):
48 |         def wrapped(self, x, y=2):
49 |             return ("meth", orig(self, x, y))
50 | 
51 |         return wrapped
52 | 
53 |     # First patch returns True; second should be idempotent and also True
54 |     assert pm.patch(spec, wf) is True
55 |     assert pm.patch(spec, wf) is True
56 | 
57 |     f = mod.Foo()
58 |     assert f.instance_method(5) == ("meth", 7)
59 | 
60 |     pm.unpatch_all()
61 |     assert f.instance_method(5) == 7
62 | 
63 | 
64 | def test_patch_manager_missing_target_returns_false():
65 |     pm = PatchManager()
66 |     spec = PatchSpec(path="non.existent.module.symbol", kind="function", name="x")
67 |     assert pm.patch(spec, lambda s, o: o) is False
68 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/button.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import { Slot } from "@radix-ui/react-slot"
 3 | import { cva, type VariantProps } from "class-variance-authority"
 4 | 
 5 | import { cn } from "@flashinfer-bench/utils"
 6 | 
 7 | const buttonVariants = cva(
 8 |   "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-hidden focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50",
 9 |   {
10 |     variants: {
11 |       variant: {
12 |         default: "bg-primary text-primary-foreground hover:bg-primary/90",
13 |         destructive:
14 |           "bg-destructive text-destructive-foreground hover:bg-destructive/90",
15 |         outline:
16 |           "border border-input bg-background hover:bg-accent hover:text-accent-foreground",
17 |         secondary:
18 |           "bg-secondary text-secondary-foreground hover:bg-secondary/80",
19 |         ghost: "hover:bg-accent hover:text-accent-foreground",
20 |         link: "text-primary underline-offset-4 hover:underline",
21 |       },
22 |       size: {
23 |         default: "h-10 px-4 py-2",
24 |         sm: "h-9 rounded-md px-3",
25 |         lg: "h-11 rounded-md px-8",
26 |         icon: "h-10 w-10",
27 |       },
28 |     },
29 |     defaultVariants: {
30 |       variant: "default",
31 |       size: "default",
32 |     },
33 |   }
34 | )
35 | 
36 | export interface ButtonProps
37 |   extends React.ButtonHTMLAttributes<HTMLButtonElement>,
38 |     VariantProps<typeof buttonVariants> {
39 |   asChild?: boolean
40 | }
41 | 
42 | const Button = React.forwardRef<HTMLButtonElement, ButtonProps>(
43 |   ({ className, variant, size, asChild = false, ...props }, ref) => {
44 |     const Comp = asChild ? Slot : "button"
45 |     return (
46 |       <Comp
47 |         className={cn(buttonVariants({ variant, size, className }))}
48 |         ref={ref}
49 |         {...props}
50 |       />
51 |     )
52 |   }
53 | )
54 | Button.displayName = "Button"
55 | 
56 | export { Button, buttonVariants }
57 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/brand/Logo.tsx:
--------------------------------------------------------------------------------
 1 | import type { StaticImageData } from "next/image"
 2 | 
 3 | import fibBlackBg from "./fib-black-bg.png"
 4 | import fibWhiteBg from "./fib-white-bg.png"
 5 | 
 6 | import { cn } from "@flashinfer-bench/utils"
 7 | 
 8 | type LogoVariant = "auto" | "light" | "dark"
 9 | 
10 | export interface LogoProps {
11 |   className?: string
12 |   /**
13 |    * Controls which asset to render. Use `light` for light backgrounds,
14 |    * `dark` for dark backgrounds, or `auto` to follow user preferences.
15 |    */
16 |   variant?: LogoVariant
17 |   /** Accessible label for the logo. */
18 |   alt?: string
19 |   /** Applies directly to the rendered `<img>` element. */
20 |   imgClassName?: string
21 |   /** Pixel height for the image (defaults to 32px). */
22 |   height?: number
23 | }
24 | 
25 | const toSrc = (image: StaticImageData | string) =>
26 |   typeof image === "string" ? image : image.src
27 | 
28 | const LIGHT_LOGO_SRC = toSrc(fibWhiteBg)
29 | const DARK_LOGO_SRC = toSrc(fibBlackBg)
30 | 
31 | export function Logo({
32 |   className,
33 |   variant = "auto",
34 |   alt = "FlashInfer Bench",
35 |   imgClassName,
36 |   height = 36,
37 | }: LogoProps) {
38 |   const wrapperClasses = cn("inline-flex items-center", className)
39 | 
40 |   const renderImg = (src: string, ariaHidden = false) => (
41 |     <img
42 |       src={src}
43 |       alt={ariaHidden ? "" : alt}
44 |       aria-hidden={ariaHidden || undefined}
45 |       className={cn("block", imgClassName)}
46 |       loading="eager"
47 |       decoding="async"
48 |       style={{ height, width: "auto" }}
49 |     />
50 |   )
51 | 
52 |   if (variant === "light") {
53 |     return <span className={wrapperClasses}>{renderImg(LIGHT_LOGO_SRC)}</span>
54 |   }
55 | 
56 |   if (variant === "dark") {
57 |     return <span className={wrapperClasses}>{renderImg(DARK_LOGO_SRC)}</span>
58 |   }
59 | 
60 |   return (
61 |     <span className={wrapperClasses}>
62 |       <picture>
63 |         <source srcSet={DARK_LOGO_SRC} media="(prefers-color-scheme: dark)" />
64 |         {renderImg(LIGHT_LOGO_SRC)}
65 |       </picture>
66 |     </span>
67 |   )
68 | }
69 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/tabs.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import * as React from "react"
 4 | import * as TabsPrimitive from "@radix-ui/react-tabs"
 5 | 
 6 | import { cn } from "@flashinfer-bench/utils"
 7 | 
 8 | const Tabs = TabsPrimitive.Root
 9 | 
10 | const TabsList = React.forwardRef<
11 |   React.ElementRef<typeof TabsPrimitive.List>,
12 |   React.ComponentPropsWithoutRef<typeof TabsPrimitive.List>
13 | >(({ className, ...props }, ref) => (
14 |   <TabsPrimitive.List
15 |     ref={ref}
16 |     className={cn(
17 |       "inline-flex h-10 items-center justify-center rounded-md bg-muted p-1 text-muted-foreground",
18 |       className
19 |     )}
20 |     {...props}
21 |   />
22 | ))
23 | TabsList.displayName = TabsPrimitive.List.displayName
24 | 
25 | const TabsTrigger = React.forwardRef<
26 |   React.ElementRef<typeof TabsPrimitive.Trigger>,
27 |   React.ComponentPropsWithoutRef<typeof TabsPrimitive.Trigger>
28 | >(({ className, ...props }, ref) => (
29 |   <TabsPrimitive.Trigger
30 |     ref={ref}
31 |     className={cn(
32 |       "inline-flex items-center justify-center whitespace-nowrap rounded-sm px-3 py-1.5 text-sm font-medium ring-offset-background transition-all focus-visible:outline-hidden focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 data-[state=active]:bg-background data-[state=active]:text-foreground data-[state=active]:shadow-xs",
33 |       className
34 |     )}
35 |     {...props}
36 |   />
37 | ))
38 | TabsTrigger.displayName = TabsPrimitive.Trigger.displayName
39 | 
40 | const TabsContent = React.forwardRef<
41 |   React.ElementRef<typeof TabsPrimitive.Content>,
42 |   React.ComponentPropsWithoutRef<typeof TabsPrimitive.Content>
43 | >(({ className, ...props }, ref) => (
44 |   <TabsPrimitive.Content
45 |     ref={ref}
46 |     className={cn(
47 |       "mt-2 ring-offset-background focus-visible:outline-hidden focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2",
48 |       className
49 |     )}
50 |     {...props}
51 |   />
52 | ))
53 | TabsContent.displayName = TabsPrimitive.Content.displayName
54 | 
55 | export { Tabs, TabsList, TabsTrigger, TabsContent }
56 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/card.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | 
 3 | import { cn } from "@flashinfer-bench/utils"
 4 | 
 5 | const Card = React.forwardRef<
 6 |   HTMLDivElement,
 7 |   React.HTMLAttributes<HTMLDivElement>
 8 | >(({ className, ...props }, ref) => (
 9 |   <div
10 |     ref={ref}
11 |     className={cn(
12 |       "rounded-lg border bg-card text-card-foreground shadow-xs",
13 |       className
14 |     )}
15 |     {...props}
16 |   />
17 | ))
18 | Card.displayName = "Card"
19 | 
20 | const CardHeader = React.forwardRef<
21 |   HTMLDivElement,
22 |   React.HTMLAttributes<HTMLDivElement>
23 | >(({ className, ...props }, ref) => (
24 |   <div ref={ref} className={cn("flex flex-col space-y-1.5 p-6", className)} {...props} />
25 | ))
26 | CardHeader.displayName = "CardHeader"
27 | 
28 | const CardTitle = React.forwardRef<
29 |   HTMLParagraphElement,
30 |   React.HTMLAttributes<HTMLHeadingElement>
31 | >(({ className, ...props }, ref) => (
32 |   <h3
33 |     ref={ref}
34 |     className={cn(
35 |       "text-2xl font-semibold leading-none tracking-tight",
36 |       className
37 |     )}
38 |     {...props}
39 |   />
40 | ))
41 | CardTitle.displayName = "CardTitle"
42 | 
43 | const CardDescription = React.forwardRef<
44 |   HTMLParagraphElement,
45 |   React.HTMLAttributes<HTMLParagraphElement>
46 | >(({ className, ...props }, ref) => (
47 |   <p
48 |     ref={ref}
49 |     className={cn("text-sm text-muted-foreground", className)}
50 |     {...props}
51 |   />
52 | ))
53 | CardDescription.displayName = "CardDescription"
54 | 
55 | const CardContent = React.forwardRef<
56 |   HTMLDivElement,
57 |   React.HTMLAttributes<HTMLDivElement>
58 | >(({ className, ...props }, ref) => (
59 |   <div ref={ref} className={cn("p-6 pt-0", className)} {...props} />
60 | ))
61 | CardContent.displayName = "CardContent"
62 | 
63 | const CardFooter = React.forwardRef<
64 |   HTMLDivElement,
65 |   React.HTMLAttributes<HTMLDivElement>
66 | >(({ className, ...props }, ref) => (
67 |   <div ref={ref} className={cn("flex items-center p-6 pt-0", className)} {...props} />
68 | ))
69 | CardFooter.displayName = "CardFooter"
70 | 
71 | export { Card, CardHeader, CardFooter, CardTitle, CardDescription, CardContent }
72 | 


--------------------------------------------------------------------------------
/flashinfer_bench/integration/flashinfer/adapters/rmsnorm.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Any, Callable, Dict, List
 4 | 
 5 | import torch
 6 | 
 7 | from flashinfer_bench.apply import apply
 8 | from flashinfer_bench.integration.patch_manager import PatchSpec
 9 | from flashinfer_bench.integration.utils import ArgBinder
10 | 
11 | 
12 | def _def_name_resolver(weight):
13 |     return f"fused_add_rmsnorm_h{weight.shape[0]}"
14 | 
15 | 
16 | class RMSNormAdapter:
17 |     """Adapter for flashinfer.norm.fused_add_rmsnorm."""
18 | 
19 |     def targets(self) -> List[PatchSpec]:
20 |         return [
21 |             PatchSpec(
22 |                 path="flashinfer.norm.fused_add_rmsnorm",
23 |                 kind="function",
24 |                 name="fused_add_rmsnorm",
25 |                 ctx_key="rmsnorm",
26 |             )
27 |         ]
28 | 
29 |     def make_wrapper(self, spec: PatchSpec, orig: Callable[..., Any]) -> Callable[..., Any]:
30 |         binder = ArgBinder.from_callable(orig)
31 | 
32 |         def wrapper(*args, **kwargs):
33 |             bound = binder.bind(args, kwargs)
34 |             input_tensor: torch.Tensor = bound["input"]
35 |             residual: torch.Tensor = bound["residual"]
36 |             weight: torch.Tensor = bound["weight"]
37 | 
38 |             # Compatibility checks
39 |             if (
40 |                 input_tensor.dtype != torch.bfloat16
41 |                 or residual.dtype != torch.bfloat16
42 |                 or weight.dtype != torch.bfloat16
43 |             ):
44 |                 return orig(*args, **kwargs)
45 |             if input_tensor.shape != residual.shape or input_tensor.shape[1] != weight.shape[0]:
46 |                 return orig(*args, **kwargs)
47 | 
48 |             def_name = _def_name_resolver(weight)
49 |             rk: Dict[str, Any] = {
50 |                 "hidden_states": input_tensor,
51 |                 "residual": residual,
52 |                 "weight": weight,
53 |             }
54 | 
55 |             def _fb(**_rk):
56 |                 return orig(*args, **kwargs)
57 | 
58 |             ret = apply(def_name, kwargs=rk, fallback=_fb)
59 |             return ret
60 | 
61 |         return wrapper
62 | 


--------------------------------------------------------------------------------
/docs/flashinfer_trace/flashinfer_trace.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | license: apache-2.0
 3 | ---
 4 | 
 5 | # FlashInfer Trace Schema
 6 | 
 7 | We organize the FlashInfer-Bench dataset into the following three core components:
 8 | 
 9 | # Definition
10 | 
11 | This component provides a formal definition for a specific computational workload encountered in a model's forward pass. It specifies the expected input and output formats. We also include a mathematical specification of the workload in the form of PyTorch code. This serves as both a precise description of the computation and a standard reference implementation.
12 | 
13 | The Definition directly guides the subsequent Solution and Trace components.
14 | 
15 | **Formal Specification:** [Definition](definition.md)
16 | 
17 | 
18 | # Solution
19 | 
20 | 
21 | This component represents a single, high-performance solution implementation of a given Definition, contributed by either human experts or autonomous agent systems. A solution must strictly adhere to the corresponding Definition, including input/output shapes and constant values. Its computation must be functionally equivalent to the mathematical specification.
22 | 
23 | The implementation is not restricted to any specific language, framework, or platform, but it must provide an entry-point function with a strictly matching signature. Once submitted, solutions are benchmarked to generate a Trace. By applying pre-collected input data to the entry point, we verify its correctness and measure its performance metrics.
24 | 
25 | **Formal Specification:** [Solution](solution.md)
26 | 
27 | 
28 | # Trace
29 | 
30 | This component is an atomic and immutable record of a single benchmark run of a Solution. A Trace serves as a detailed log entry, precisely linking a Solution to a Definition for a specific workload configuration (i.e., concrete shapes and input data), and contains the complete evaluation result.
31 | 
32 | The collection of Traces is the central artifact of the FlashInfer-Bench ecosystem, creating a complete, queryable performance database that enables both high-level analysis and the programmatic discovery of the optimal Solution for any given Definition and environment.
33 | 
34 | **Formal Specification:** [Trace](trace.md)
35 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/site-footer.tsx:
--------------------------------------------------------------------------------
 1 | import Link from "next/link"
 2 | 
 3 | const GITHUB_URL = "https://github.com/flashinfer-ai"
 4 | 
 5 | export function SiteFooter() {
 6 |   return (
 7 |     <footer className="border-t bg-background">
 8 |       <div className="container flex flex-col items-center justify-between gap-4 py-10 md:h-24 md:flex-row md:py-0">
 9 |         <div className="flex flex-col items-center gap-4 px-8 md:flex-row md:gap-2 md:px-0">
10 |           <p className="text-center text-sm leading-loose text-muted-foreground md:text-left">
11 |             Built by the FlashInfer community.
12 |           </p>
13 |         </div>
14 |         <div className="flex items-center space-x-1">
15 |           <Link
16 |             href={GITHUB_URL}
17 |             target="_blank"
18 |             rel="noreferrer"
19 |             className="inline-flex h-9 items-center justify-center rounded-md px-3 py-2 text-sm font-medium transition-colors hover:bg-accent hover:text-accent-foreground focus-visible:outline-hidden focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 ring-offset-background"
20 |           >
21 |             <span className="sr-only">GitHub</span>
22 |             <svg
23 |               aria-hidden="true"
24 |               viewBox="0 0 24 24"
25 |               className="h-4 w-4"
26 |               fill="currentColor"
27 |             >
28 |               <path d="M12 0C5.37 0 0 5.37 0 12c0 5.3 3.438 9.8 8.207 11.387.6.113.82-.262.82-.582 0-.288-.012-1.244-.018-2.256-3.338.726-4.042-1.61-4.042-1.61-.546-1.386-1.332-1.756-1.332-1.756-1.09-.746.082-.73.082-.73 1.205.084 1.84 1.236 1.84 1.236 1.07 1.835 2.807 1.305 3.492.998.108-.774.418-1.305.762-1.605-2.665-.304-5.466-1.332-5.466-5.93 0-1.31.468-2.38 1.235-3.22-.124-.304-.535-1.526.117-3.176 0 0 1.008-.322 3.3 1.23a11.5 11.5 0 0 1 3.003-.404 11.5 11.5 0 0 1 3.003.404c2.29-1.552 3.297-1.23 3.297-1.23.653 1.65.242 2.872.119 3.176.77.84 1.233 1.91 1.233 3.22 0 4.61-2.804 5.624-5.476 5.922.43.372.814 1.102.814 2.222 0 1.604-.015 2.895-.015 3.286 0 .322.216.7.826.58C20.565 21.797 24 17.298 24 12 24 5.37 18.63 0 12 0Z" />
29 |             </svg>
30 |           </Link>
31 |         </div>
32 |       </div>
33 |     </footer>
34 |   )
35 | }
36 | 


--------------------------------------------------------------------------------
/web/apps/web/next.config.ts:
--------------------------------------------------------------------------------
 1 | import type { NextConfig } from 'next'
 2 | 
 3 | const DOCS_ORIGIN =
 4 |   process.env.DOCS_ORIGIN ??
 5 |   'https://flashinfer-bench.mintlify.app'
 6 | 
 7 | const nextConfig: NextConfig = {
 8 |   transpilePackages: [
 9 |     '@flashinfer-bench/ui',
10 |     '@flashinfer-bench/utils',
11 |     '@flashinfer-bench/config',
12 |   ],
13 |   async rewrites() {
14 |     return [
15 |       // Sphinx documentation
16 |       { source: '/docs/api/python',         destination: '/docs/api/python/index.html' },
17 |       // Mintlify documentation
18 |       { source: '/docs', destination: `${DOCS_ORIGIN}/docs` },
19 |       { source: '/docs/:path*', destination: `${DOCS_ORIGIN}/docs/:path*` },
20 |       // Mintlify assets
21 |       { source: '/mintlify-assets/:path*', destination: `${DOCS_ORIGIN}/mintlify-assets/:path*` },
22 |       { source: '/_mintlify/:path*', destination: `${DOCS_ORIGIN}/_mintlify/:path*` },
23 |       // Mintlify next assets
24 |       {
25 |         source: '/_next/static/:path*',
26 |         has: [
27 |           {
28 |             type: 'header',
29 |             key: 'referer',
30 |             value: 'https?://[^/]+/docs(?:/.*)?',
31 |           },
32 |         ],
33 |         destination: `${DOCS_ORIGIN}/_next/static/:path*`,
34 |       },
35 |       {
36 |         source: '/_next/image/:path*',
37 |         has: [
38 |           {
39 |             type: 'header',
40 |             key: 'referer',
41 |             value: 'https?://[^/]+/docs(?:/.*)?',
42 |           },
43 |         ],
44 |         destination: `${DOCS_ORIGIN}/_next/image/:path*`,
45 |       },
46 |     ]
47 |   },
48 |   async headers() {
49 |     return [
50 |       {
51 |         source: '/docs/api/python/:all*(css|js|png|jpg|gif|svg|ico|woff|woff2)',
52 |         headers: [{ key: 'Cache-Control', value: 'public, max-age=31536000, immutable' }],
53 |       },
54 |       {
55 |         source: '/docs/api/python/:path*',
56 |         headers: [{ key: 'Cache-Control', value: 'public, max-age=60' }],
57 |       },
58 |       {
59 |         source: '/:path*',
60 |         headers: [
61 |           { key: 'X-DNS-Prefetch-Control', value: 'on' },
62 |           { key: 'X-Frame-Options', value: 'SAMEORIGIN' },
63 |         ],
64 |       },
65 |     ]
66 |   },
67 | }
68 | 
69 | export default nextConfig
70 | 


--------------------------------------------------------------------------------
/web/apps/web/app/models/[id]/model-tabs.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import dynamic from "next/dynamic"
 4 | import { Model } from "@/lib/schemas"
 5 | import { Card, CardContent, CardHeader, CardTitle } from "@flashinfer-bench/ui"
 6 | 
 7 | // Dynamic import for client-side only rendering
 8 | const ModelArchWrapper = dynamic(
 9 |   () => import("./arch-overview").then(mod => mod.ModelArchWrapper),
10 |   {
11 |     ssr: false,
12 |     loading: () => <div className="h-[800px] flex items-center justify-center">Loading visualization...</div>
13 |   }
14 | )
15 | 
16 | export function ModelTabs({ model }: { model: Model }) {
17 |   return (
18 |     <div className="space-y-8">
19 |       <div>
20 |         <h2 className="text-2xl font-semibold mb-4">Architecture Overview</h2>
21 |         <ModelArchWrapper model={model} />
22 |       </div>
23 | 
24 |       <Card>
25 |         <CardHeader>
26 |           <CardTitle>Architecture Summary</CardTitle>
27 |         </CardHeader>
28 |         <CardContent>
29 |           <div className="grid grid-cols-2 md:grid-cols-4 gap-4">
30 |             <div>
31 |               <p className="text-2xl font-bold">
32 |                 {Object.keys(model.modules).length}
33 |               </p>
34 |               <p className="text-sm text-muted-foreground">Total Modules</p>
35 |             </div>
36 |             <div>
37 |               <p className="text-2xl font-bold">
38 |                 {Object.values(model.modules).filter(m => m.type === "block").length}
39 |               </p>
40 |               <p className="text-sm text-muted-foreground">Blocks</p>
41 |             </div>
42 |             <div>
43 |               <p className="text-2xl font-bold">
44 |                 {Object.values(model.modules).filter(m => m.type === "layer").length}
45 |               </p>
46 |               <p className="text-sm text-muted-foreground">Kernels</p>
47 |             </div>
48 |             <div>
49 |               <p className="text-2xl font-bold">
50 |                 {Object.values(model.modules).filter(m => m.type === "layer" && (m.definitions?.length ?? 0) > 0).length}
51 |               </p>
52 |               <p className="text-sm text-muted-foreground">Traced Kernels</p>
53 |             </div>
54 |           </div>
55 |         </CardContent>
56 |       </Card>
57 |     </div>
58 |   )
59 | }
60 | 


--------------------------------------------------------------------------------
/docs/op_type_schema/mla_paged.md:
--------------------------------------------------------------------------------
 1 | # mla_paged
 2 | 
 3 |  Multi-head Latent Attention (MLA) with paged memory layout. MLA is an advanced attention mechanism that decomposes the key-value representation into separate compressed key-value (CKV) and key positional encoding (KPE) components to reduce memory usage while maintaining model performance. The paged layout enables efficient memory management for variable-length sequences.
 4 | 
 5 | Variants:
 6 | - prefill
 7 | - decode
 8 | 
 9 | ## prefill
10 | 
11 | Axes (8 dimensions):
12 | - `total_q`, `num_pages`, `len_indptr`, `num_kv_indices`: variable
13 | - `num_qo_heads`, `head_dim_ckv`, `head_dim_kpe`, `page_size`: constant
14 | 
15 | Inputs (7 tensors + 1 scalar):
16 | - `q_nope`: query tensor without positional encoding [total_q, num_qo_heads, head_dim_ckv]
17 | - `q_pe`: query positional encoding component [total_q, num_qo_heads, head_dim_kpe]
18 | - `ckv_cache`: compressed key-value cache [num_pages, page_size, head_dim_ckv]
19 | - `kpe_cache`: key positional encoding cache [num_pages, page_size, head_dim_kpe]
20 | - `qo_indptr`, `kv_indptr`, `kv_indices`: paging indices
21 | - `sm_scale`: softmax scale (scalar)
22 | 
23 | Outputs (2 tensors):
24 | - `output`: attention output [total_q, num_qo_heads, head_dim_ckv]
25 | - `lse`: log-sum-exp values [total_q, num_qo_heads]
26 | 
27 | Constraints:
28 | - `total_q == qo_indptr[-1]`
29 | - `num_kv_indices = kv_indptr[-1]`
30 | 
31 | ## decode
32 | 
33 | Axes (8 dimensions):
34 | - `batch_size`, `num_pages`, `len_indptr`, `num_kv_indices`: variable
35 | - `num_qo_heads`, `head_dim_ckv`, `head_dim_kpe`, `page_size`: constant
36 | 
37 | Inputs (6 tensors + 1 scalar):
38 | - `q_nope`: query tensor without positional encoding [batch_size, num_qo_heads, head_dim_ckv]
39 | - `q_pe`: query positional encoding [batch_size, num_qo_heads, head_dim_kpe]
40 | - `ckv_cache`: compressed key-value cache [num_pages, page_size, head_dim_ckv]
41 | - `kpe_cache`: key positional encoding cache [num_pages, page_size, head_dim_kpe]
42 | - `kv_indptr`, `kv_indices`: paging indices
43 | - `sm_scale`: softmax scale (scalar)
44 | 
45 | Outputs (2 tensors):
46 | - `output`: attention output [batch_size, num_qo_heads, head_dim_ckv]
47 | - `lse`: log-sum-exp values [batch_size, num_qo_heads]
48 | 
49 | Constraints:
50 | - `len_indptr = num_pages + 1`
51 | - `num_kv_indices = kv_indptr[-1]`
52 | 


--------------------------------------------------------------------------------
/flashinfer_bench/data/workload.py:
--------------------------------------------------------------------------------
 1 | """Specification for workloads, which defines the input tensors for a kernel."""
 2 | 
 3 | from typing import Dict, Literal, Union
 4 | 
 5 | from .utils import BaseModelWithDocstrings, NonEmptyString, NonNegativeInt
 6 | 
 7 | 
 8 | class RandomInput(BaseModelWithDocstrings):
 9 |     """Random input generation descriptor.
10 | 
11 |     Represents a specification for generating random tensor input data
12 |     during workload execution and benchmarking.
13 |     """
14 | 
15 |     type: Literal["random"] = "random"
16 |     """The input type identifier for random data generation."""
17 | 
18 | 
19 | class ScalarInput(BaseModelWithDocstrings):
20 |     """Scalar literal input specification.
21 | 
22 |     Represents a scalar value (integer, float, or boolean) that will be
23 |     used as a direct input parameter to the computational workload.
24 |     """
25 | 
26 |     type: Literal["scalar"] = "scalar"
27 |     """The input type identifier for scalar values."""
28 |     value: Union[int, float, bool]
29 |     """The scalar value to be used as input. Must be int, float, or bool."""
30 | 
31 | 
32 | class SafetensorsInput(BaseModelWithDocstrings):
33 |     """Input specification for data loaded from safetensors files.
34 | 
35 |     Represents tensor data that will be loaded from a safetensors file
36 |     using a specific tensor key within that file.
37 |     """
38 | 
39 |     type: Literal["safetensors"] = "safetensors"
40 |     """The input type identifier for safetensors data."""
41 |     path: NonEmptyString
42 |     """Path to the safetensors file containing the tensor data. The path is relative to the root
43 |     path of the TraceSet."""
44 |     tensor_key: NonEmptyString
45 |     """Key identifier for the specific tensor within the safetensors file."""
46 | 
47 | 
48 | InputSpec = Union[RandomInput, SafetensorsInput, ScalarInput]
49 | """Union type representing all possible input specification types."""
50 | 
51 | 
52 | class Workload(BaseModelWithDocstrings):
53 |     """Concrete workload configuration for benchmarking.
54 | 
55 |     Defines a specific instance of a computational workload with concrete
56 |     values for all variable axes and specifications for all input data.
57 |     This represents an executable configuration that can be benchmarked.
58 |     """
59 | 
60 |     axes: Dict[str, NonNegativeInt]
61 |     """Dictionary mapping axis names to their concrete integer values. All values must be
62 |     positive."""
63 |     inputs: Dict[str, InputSpec]
64 |     """Dictionary mapping input names to their data specifications."""
65 |     uuid: NonEmptyString
66 |     """Unique identifier for this specific workload configuration."""
67 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/site-header.tsx:
--------------------------------------------------------------------------------
 1 | import Link from "next/link"
 2 | import type { ReactNode } from "react"
 3 | 
 4 | import { cn } from "@flashinfer-bench/utils"
 5 | 
 6 | import { Logo } from "../brand/Logo"
 7 | 
 8 | export interface SiteHeaderNavItem {
 9 |   href: string
10 |   label: string
11 |   external?: boolean
12 | }
13 | 
14 | export interface SiteHeaderProps {
15 |   className?: string
16 |   navItems?: SiteHeaderNavItem[]
17 |   searchSlot?: ReactNode
18 |   rightSlot?: ReactNode
19 |   logoHref?: string
20 |   logoHeight?: number
21 | }
22 | 
23 | export function SiteHeader({
24 |   className,
25 |   navItems = [],
26 |   searchSlot,
27 |   rightSlot,
28 |   logoHref = "/",
29 |   logoHeight = 40,
30 | }: SiteHeaderProps) {
31 |   return (
32 |     <header
33 |       className={cn(
34 |         "sticky top-0 z-50 w-full border-b border-border/60 bg-background/95 backdrop-blur",
35 |         "supports-[backdrop-filter]:bg-background/60",
36 |         className,
37 |       )}
38 |     >
39 |       <div className="mx-auto flex h-16 w-full max-w-[1400px] items-center px-4 md:px-6">
40 |         <Link
41 |           href={logoHref}
42 |           className="flex shrink-0 items-center"
43 |           aria-label="FlashInfer Bench"
44 |         >
45 |           <Logo height={logoHeight} />
46 |         </Link>
47 | 
48 |         <div className="flex flex-1 items-center justify-end gap-4 md:gap-6">
49 |           {searchSlot ? (
50 |             <div className="hidden items-center md:flex md:min-w-[240px] md:max-w-[320px]">
51 |               {searchSlot}
52 |             </div>
53 |           ) : null}
54 | 
55 |           {navItems.length > 0 ? (
56 |             <nav className="flex items-center gap-4 md:gap-6">
57 |               {navItems.map(({ href, label, external }) => {
58 |                 const classNames = "text-sm font-medium text-muted-foreground transition-colors hover:text-foreground"
59 |                 if (external) {
60 |                   return (
61 |                     <a
62 |                       key={href}
63 |                       href={href}
64 |                       className={classNames}
65 |                     >
66 |                       {label}
67 |                     </a>
68 |                   )
69 |                 }
70 |                 return (
71 |                   <Link key={href} href={href} className={classNames}>
72 |                     {label}
73 |                   </Link>
74 |                 )
75 |               })}
76 |             </nav>
77 |           ) : null}
78 | 
79 |           {rightSlot ? (
80 |             <div className="flex items-center gap-3">{rightSlot}</div>
81 |           ) : null}
82 |         </div>
83 |       </div>
84 |     </header>
85 |   )
86 | }
87 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/toaster.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | // Simple toast implementation
 4 | import { useState, useEffect } from "react"
 5 | 
 6 | interface Toast {
 7 |   id: string
 8 |   title?: string
 9 |   description?: string
10 |   variant?: "default" | "destructive"
11 | }
12 | 
13 | let toasts: Toast[] = []
14 | let listeners: ((toasts: Toast[]) => void)[] = []
15 | 
16 | function emitChange() {
17 |   listeners.forEach(listener => listener(toasts))
18 | }
19 | 
20 | export function toast({ title, description, variant = "default" }: Omit<Toast, "id">) {
21 |   const id = Math.random().toString(36).substring(2, 9)
22 |   toasts = [...toasts, { id, title, description, variant }]
23 |   emitChange()
24 | 
25 |   setTimeout(() => {
26 |     toasts = toasts.filter(t => t.id !== id)
27 |     emitChange()
28 |   }, 5000)
29 | }
30 | 
31 | export function useToast() {
32 |   const [toastList, setToastList] = useState<Toast[]>([])
33 | 
34 |   useEffect(() => {
35 |     listeners.push(setToastList)
36 |     return () => {
37 |       listeners = listeners.filter(l => l !== setToastList)
38 |     }
39 |   }, [])
40 | 
41 |   return {
42 |     toasts: toastList,
43 |     toast,
44 |     dismiss: (id: string) => {
45 |       toasts = toasts.filter(t => t.id !== id)
46 |       emitChange()
47 |     }
48 |   }
49 | }
50 | 
51 | export function Toaster() {
52 |   const { toasts, dismiss } = useToast()
53 | 
54 |   return (
55 |     <div className="fixed bottom-0 right-0 z-100 flex max-h-screen w-full flex-col-reverse p-4 sm:bottom-0 sm:right-0 sm:top-auto sm:flex-col md:max-w-[420px]">
56 |       {toasts.map((toast) => (
57 |         <div
58 |           key={toast.id}
59 |           className={`group pointer-events-auto relative flex w-full items-center justify-between space-x-4 overflow-hidden rounded-md border p-6 pr-8 shadow-lg transition-all ${
60 |             toast.variant === "destructive"
61 |               ? "border-destructive bg-destructive text-destructive-foreground"
62 |               : "border bg-background text-foreground"
63 |           }`}
64 |         >
65 |           <div className="grid gap-1">
66 |             {toast.title && (
67 |               <div className="text-sm font-semibold">{toast.title}</div>
68 |             )}
69 |             {toast.description && (
70 |               <div className="text-sm opacity-90">{toast.description}</div>
71 |             )}
72 |           </div>
73 |           <button
74 |             onClick={() => dismiss(toast.id)}
75 |             className="absolute right-2 top-2 rounded-md p-1 text-foreground/50 opacity-0 transition-opacity hover:text-foreground focus:opacity-100 focus:outline-hidden focus:ring-2 group-hover:opacity-100"
76 |           >
77 |             ×
78 |           </button>
79 |         </div>
80 |       ))}
81 |     </div>
82 |   )
83 | }
84 | 


--------------------------------------------------------------------------------
/web/apps/web/app/kernels/[name]/header.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import { useState } from "react"
 4 | import Link from "next/link"
 5 | import { Button } from "@flashinfer-bench/ui"
 6 | import { Copy, Check, ArrowLeft } from "lucide-react"
 7 | import { Definition } from "@/lib/schemas"
 8 | 
 9 | export function DefinitionHeader({
10 |   definition,
11 |   solutionsCount,
12 | }: {
13 |   definition: Definition
14 |   solutionsCount: number
15 | }) {
16 |   const [copiedItem, setCopiedItem] = useState<string | null>(null)
17 | 
18 |   const copyToClipboard = async (text: string, type: string) => {
19 |     try {
20 |       await navigator.clipboard.writeText(text)
21 |       setCopiedItem(type)
22 |       setTimeout(() => setCopiedItem(null), 2000)
23 |     } catch (err) {
24 |       console.error("Failed to copy:", err)
25 |     }
26 |   }
27 | 
28 |   const copyJSON = () => {
29 |     const orderedDefinition: any = {}
30 |     Object.keys(definition).forEach((key) => {
31 |       orderedDefinition[key] = (definition as any)[key]
32 |     })
33 |     copyToClipboard(JSON.stringify(orderedDefinition, null, 2), "json")
34 |   }
35 | 
36 |   return (
37 |     <div className="sticky top-14 z-40 bg-background/95 backdrop-blur supports-[backdrop-filter]:bg-background/60 border-b">
38 |       <div className="container py-3">
39 |         <div className="flex items-center justify-between">
40 |           <div className="flex items-center gap-4">
41 |             <Link href="/" className="text-sm text-muted-foreground hover:text-foreground">
42 |               <ArrowLeft className="h-4 w-4" />
43 |             </Link>
44 |             <div className="flex flex-col">
45 |               <h1 className="text-xl font-mono font-bold">{definition.name}</h1>
46 |               {definition.op_type ? (
47 |                 <span className="text-xs uppercase tracking-wide text-muted-foreground">
48 |                   {definition.op_type}
49 |                 </span>
50 |               ) : null}
51 |             </div>
52 |           </div>
53 |           <div className="flex items-center gap-2 text-sm">
54 |             <Button variant="ghost" size="sm" onClick={copyJSON}>
55 |               {copiedItem === "json" ? (
56 |                 <>
57 |                   <Check className="h-3 w-3 mr-1" />
58 |                   Copied
59 |                 </>
60 |               ) : (
61 |                 <>
62 |                   <Copy className="h-3 w-3 mr-1" />
63 |                   Copy JSON
64 |                 </>
65 |               )}
66 |             </Button>
67 |             <span className="text-muted-foreground">·</span>
68 |             <a href="#solutions" className="hover:underline">
69 |               Solutions ({solutionsCount})
70 |             </a>
71 |           </div>
72 |         </div>
73 |       </div>
74 |     </div>
75 |   )
76 | }
77 | 


--------------------------------------------------------------------------------
/flashinfer_bench/compile/builders/triton_builder.py:
--------------------------------------------------------------------------------
 1 | """Builder for Triton GPU kernels."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import ClassVar
 6 | 
 7 | from flashinfer_bench.compile.builder import Builder
 8 | from flashinfer_bench.compile.runnable import Runnable
 9 | from flashinfer_bench.data import Definition, Solution, SupportedLanguages
10 | 
11 | from .python_builder import PythonBuilder
12 | 
13 | 
14 | class TritonBuilder(PythonBuilder):
15 |     """Builder for Triton solutions.
16 | 
17 |     This builder extends PythonBuilder to handle Triton GPU kernels. Triton code
18 |     is Python-based, so the build process is similar to PythonBuilder, with the
19 |     main difference being the language tag in metadata.
20 |     """
21 | 
22 |     _PACKAGE_PREFIX: ClassVar[str] = "fib_triton_"
23 |     """Prefix for cache keys to distinguish Triton solutions from pure Python ones."""
24 | 
25 |     _BUILD_DIR_NAME: ClassVar[str] = "triton"
26 |     """Subdirectory under FIB_CACHE_PATH where build results are stored"""
27 | 
28 |     def __init__(self) -> None:
29 |         Builder.__init__(self, self._PACKAGE_PREFIX, self._BUILD_DIR_NAME)
30 | 
31 |     @staticmethod
32 |     def is_available() -> bool:
33 |         """Check if Triton is available in the current environment.
34 | 
35 |         Returns
36 |         -------
37 |         bool
38 |             True if Triton is installed, False otherwise.
39 |         """
40 |         try:
41 |             import triton
42 |         except ImportError:
43 |             return False
44 |         return True
45 | 
46 |     def can_build(self, solution: Solution) -> bool:
47 |         """Check if this builder can build the given solution.
48 |         The solution should be Triton source code.
49 | 
50 |         Parameters
51 |         ----------
52 |         solution : Solution
53 |             Solution to check
54 | 
55 |         Returns
56 |         -------
57 |         bool
58 |             True if solution language is Triton
59 |         """
60 |         return solution.spec.language == SupportedLanguages.TRITON
61 | 
62 |     def build(self, definition: Definition, solution: Solution) -> Runnable:
63 |         """Build a Triton solution into a runnable.
64 | 
65 |         This method delegates to PythonBuilder.build() and updates the build_type
66 |         in metadata to 'triton'.
67 | 
68 |         Parameters
69 |         ----------
70 |         definition : Definition
71 |             The problem definition.
72 |         solution : Solution
73 |             The Triton solution to build.
74 | 
75 |         Returns
76 |         -------
77 |         Runnable
78 |             An executable wrapper around the Triton kernel.
79 |         """
80 |         result = super().build(definition, solution)
81 |         result.metadata.build_type = "triton"
82 |         return result
83 | 


--------------------------------------------------------------------------------
/web/apps/web/app/page.tsx:
--------------------------------------------------------------------------------
 1 | import { Suspense } from "react"
 2 | import { LeaderboardSection } from "@/app/leaderboard/section"
 3 | import { ModelsSection } from "@/app/models"
 4 | import { getAllDefinitions, getAllModels, getSolutionsForDefinition, getTracesForDefinition } from "@/lib/data-loader"
 5 | import type { BaselineConfig } from "@/lib/analytics"
 6 | import baselinesData from "@/data/baselines"
 7 | import { KernelsSection } from "./kernels"
 8 | 
 9 | export default async function HomePage() {
10 |   const [allDefinitions, models] = await Promise.all([getAllDefinitions(), getAllModels()])
11 | 
12 |   // Load counts for each definition
13 |   const definitionEntries = await Promise.all(
14 |     allDefinitions.map(async (definition) => {
15 |       const [solutions, traces] = await Promise.all([
16 |         getSolutionsForDefinition(definition.name),
17 |         getTracesForDefinition(definition.name)
18 |       ])
19 | 
20 |       const rawBaseline = (baselinesData as Record<string, Record<string, string> | undefined>)[definition.name]
21 |       const baseline: BaselineConfig | undefined = rawBaseline
22 |         ? {
23 |             default: rawBaseline.default,
24 |             devices: Object.fromEntries(
25 |               Object.entries(rawBaseline).filter(([key]) => key !== "default")
26 |             ),
27 |           }
28 |         : undefined
29 | 
30 |       const baselineNamesSet = new Set<string>()
31 |       if (baseline?.default) baselineNamesSet.add(baseline.default)
32 |       if (baseline?.devices) {
33 |         for (const value of Object.values(baseline.devices)) baselineNamesSet.add(value)
34 |       }
35 | 
36 |       return {
37 |         definition,
38 |         solutions,
39 |         traces,
40 |         solutionCount: solutions.length,
41 |         traceCount: traces.length,
42 |         baseline,
43 |         baselineNames: Array.from(baselineNamesSet),
44 |       }
45 |     })
46 |   )
47 | 
48 |   const definitionsWithCounts = definitionEntries.map(({ definition, solutionCount, traceCount }) => ({
49 |     ...definition,
50 |     solutionCount,
51 |     traceCount,
52 |   }))
53 | 
54 |   const leaderboardEntries = definitionEntries.map((entry) => ({
55 |     definition: entry.definition,
56 |     solutions: entry.solutions,
57 |     traces: entry.traces,
58 |     baseline: entry.baseline,
59 |     baselineNames: entry.baselineNames,
60 |   }))
61 | 
62 |   return (
63 |     <div className="flex flex-col">
64 |       <LeaderboardSection
65 |         entries={leaderboardEntries}
66 |         baselineLabel="Per-definition baselines"
67 |       />
68 | 
69 |       <ModelsSection models={models} />
70 | 
71 |       <Suspense fallback={<div className="container py-12 text-sm text-muted-foreground">Loading kernels…</div>}>
72 |         <KernelsSection definitions={definitionsWithCounts} />
73 |       </Suspense>
74 |     </div>
75 |   )
76 | }
77 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center" id="top">
 2 | 
 3 | <img src="web/packages/ui/src/brand/fib_logo.png" alt="logo" width="400" margin="10px"></img>
 4 | 
 5 | [![Documentation](https://img.shields.io/badge/docs-latest-green)](https://bench.flashinfer.ai/docs/)
 6 | [![License](https://img.shields.io/badge/license-apache_2-blue)](https://github.com/flashinfer-ai/flashinfer-bench/blob/main/LICENCE)
 7 | [![PyPI](https://img.shields.io/pypi/v/flashinfer-bench)](https://pypi.org/project/flashinfer-bench/)
 8 | 
 9 | **Building the Virtuous Cycle for AI-driven LLM Systems**
10 | 
11 | [Get Started](#get-started) | [Documentation](https://bench.flashinfer.ai/docs/) | [Blogpost](https://flashinfer.ai/2025/10/21/flashinfer-bench.html)
12 | | [Slack (#flashinfer-bench)](https://join.slack.com/t/flashinfer/shared_invite/zt-379wct3hc-D5jR~1ZKQcU00WHsXhgvtA) </div>
13 | 
14 | **FlashInfer-Bench** is a benchmark suite and production workflow designed to build a virtuous cycle of self-improving AI systems.
15 | 
16 | It is part of a broader initiative to build the *virtuous cycle of AI improving AI systems* — enabling AI agents and engineers to collaboratively optimize the very kernels that power large language models.
17 | 
18 | ## Installation
19 | 
20 | Install FlashInfer-Bench with pip:
21 | 
22 | ```bash
23 | pip install flashinfer-bench
24 | ```
25 | 
26 | Import FlashInfer-Bench:
27 | 
28 | ```python
29 | import flashinfer_bench as fib
30 | 
31 | print(fib.__version__)
32 | ```
33 | 
34 | ## Get Started
35 | 
36 | This [guide](https://bench.flashinfer.ai/docs/start/quick_start) shows you how to use FlashInfer-Bench python module with the FlashInfer-Trace dataset.
37 | 
38 | ## FlashInfer Trace Dataset
39 | 
40 | We provide an official dataset called **FlashInfer-Trace** with kernels and workloads in real-world AI system deployment environments. FlashInfer-Bench can use this dataset to measure and compare the performance of kernels. It follows the [FlashInfer Trace Schema](https://bench.flashinfer.ai/docs/flashinfer-trace).
41 | 
42 | The official dataset is on HuggingFace: https://huggingface.co/datasets/flashinfer-ai/flashinfer-trace
43 | 
44 | ## Collaborators
45 | 
46 | Our collaborators include:
47 | 
48 | <div align="center">
49 | 
50 | [<img src="https://raw.githubusercontent.com/mlc-ai/XGrammar-web-assets/refs/heads/main/repo/nvidia.svg" height=50/>](https://github.com/NVIDIA/TensorRT-LLM)
51 | &emsp;
52 | [<img src="https://raw.githubusercontent.com/mlc-ai/XGrammar-web-assets/refs/heads/main/repo/gpu_mode.png" height=50/>](https://github.com/gpu-mode)
53 | &emsp;
54 | [<img src="https://raw.githubusercontent.com/mlc-ai/XGrammar-web-assets/refs/heads/main/repo/sglang.png" height=50/>](https://github.com/sgl-project/sglang)
55 | &emsp;
56 | [<img src="https://raw.githubusercontent.com/mlc-ai/XGrammar-web-assets/refs/heads/main/repo/vllm.png" height=50/>](https://github.com/vllm-project/vllm)
57 | &emsp;
58 | [<img src="https://raw.githubusercontent.com/mlc-ai/XGrammar-web-assets/refs/heads/main/repo/bosch.svg" height=50/>](https://www.bosch.com/)
59 | 
60 | </div>
61 | 


--------------------------------------------------------------------------------
/flashinfer_bench/integration/patch_manager.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import importlib
 4 | from dataclasses import dataclass
 5 | from typing import Any, Callable, Dict, Literal, Optional, Tuple
 6 | 
 7 | Kind = Literal["function", "method", "callable"]
 8 | 
 9 | 
10 | @dataclass
11 | class PatchSpec:
12 |     path: str
13 |     kind: Kind
14 |     name: str
15 |     ctx_key: Optional[str] = None
16 | 
17 | 
18 | class PatchManager:
19 |     """Responsible for: resolve target, replace attr, restore original."""
20 | 
21 |     def __init__(self) -> None:
22 |         # (owner_obj, attr_name) -> original
23 |         self._originals: Dict[Tuple[object, str], Any] = {}
24 | 
25 |     def _resolve(self, path: str) -> Tuple[object, str, Any]:
26 |         """
27 |         Resolve a dotted path to (owner, attr, original_attr).
28 |         Works for module functions or class attributes (methods).
29 |         """
30 |         parts = path.split(".")
31 |         # greedily import the longest module prefix
32 |         for i in range(len(parts), 0, -1):
33 |             mod_name = ".".join(parts[:i])
34 |             try:
35 |                 mod = importlib.import_module(mod_name)
36 |                 owner: object = mod
37 |                 rest = parts[i:]
38 |                 break
39 |             except Exception:
40 |                 continue
41 |         else:
42 |             raise ImportError(f"Cannot import any module from '{path}'")
43 | 
44 |         # descend attributes to find owner of the final attribute
45 |         for j in range(len(rest) - 1):
46 |             owner = getattr(owner, rest[j])
47 | 
48 |         attr_name = rest[-1] if rest else None
49 |         if attr_name is None:
50 |             raise AttributeError(f"Path '{path}' has no attribute segment")
51 | 
52 |         original = getattr(owner, attr_name)
53 |         return owner, attr_name, original
54 | 
55 |     def patch(
56 |         self,
57 |         spec: PatchSpec,
58 |         wrapper_factory: Callable[[PatchSpec, Callable[..., Any]], Callable[..., Any]],
59 |     ) -> bool:
60 |         """Install a wrapper on target; return True if success, False if target missing."""
61 |         try:
62 |             owner, attr, original = self._resolve(spec.path)
63 |         except Exception:
64 |             return False  # target not present in this env; silently ignore
65 | 
66 |         key = (owner, attr)
67 |         if key in self._originals:
68 |             return True  # already patched (idempotent)
69 | 
70 |         wrapper = wrapper_factory(spec, original)
71 |         setattr(owner, attr, wrapper)
72 |         self._originals[key] = original
73 |         return True
74 | 
75 |     def unpatch_all(self) -> None:
76 |         """Restore all originals."""
77 |         for (owner, attr), original in list(self._originals.items()):
78 |             try:
79 |                 setattr(owner, attr, original)
80 |             except Exception:
81 |                 pass
82 |         self._originals.clear()
83 | 
84 | 
85 | _manager = PatchManager()
86 | 
87 | 
88 | def get_manager() -> PatchManager:
89 |     return _manager
90 | 


--------------------------------------------------------------------------------
/web/apps/web/app/kernels/[name]/page.tsx:
--------------------------------------------------------------------------------
 1 | import { Suspense } from "react"
 2 | import { notFound } from "next/navigation"
 3 | import { getDefinition, getSolutionsForDefinition, getTracesForDefinition, getAllDefinitions } from "@/lib/data-loader"
 4 | import { computeCorrectnessSummaryForSolutions, computeFastPCurvesForSolutions, type BaselineConfig } from "@/lib/analytics"
 5 | import baselinesData from "@/data/baselines"
 6 | import { DefinitionHeader } from "./header"
 7 | import { AxesSignatureSection } from "./axes-sig"
 8 | import { ConstraintsSection } from "./constraints"
 9 | import { DefinitionReference } from "./reference"
10 | import { SolutionsSection } from "./solutions"
11 | 
12 | export async function generateStaticParams() {
13 |   const definitions = await getAllDefinitions()
14 |   return definitions.map((definition) => ({
15 |     name: definition.name,
16 |   }))
17 | }
18 | 
19 | export default async function TraceDetailPage({
20 |   params
21 | }: {
22 |   params: Promise<{ name: string }>
23 | }) {
24 |   const { name } = await params
25 |   const definition = await getDefinition(name)
26 | 
27 |   if (!definition) {
28 |     notFound()
29 |   }
30 | 
31 |   const [solutions, traces] = await Promise.all([
32 |     getSolutionsForDefinition(definition.name),
33 |     getTracesForDefinition(definition.name)
34 |   ])
35 | 
36 |   const baselineConfig = (baselinesData as Record<string, Record<string, string> | undefined>)[definition.name]
37 |   const baseline: BaselineConfig | undefined = baselineConfig
38 |     ? {
39 |         default: baselineConfig.default,
40 |         devices: Object.fromEntries(Object.entries(baselineConfig).filter(([key]) => key !== "default")),
41 |       }
42 |     : undefined
43 | 
44 |   const correctness = computeCorrectnessSummaryForSolutions(traces, solutions)
45 |   const { curves, nWorkloads } = computeFastPCurvesForSolutions({
46 |     traces,
47 |     solutions,
48 |     baseline,
49 |     sampleCount: 300,
50 |   })
51 | 
52 |   const precomputed = {
53 |     curves,
54 |     correctness,
55 |     nWorkloads,
56 |   }
57 | 
58 |   return (
59 |     <div className="relative">
60 |       <DefinitionHeader
61 |         definition={definition}
62 |         solutionsCount={solutions.length}
63 |       />
64 | 
65 |       <div className="container py-8">
66 |         <div className="space-y-8">
67 |           <p className="text-muted-foreground">{definition.description}</p>
68 | 
69 |           <AxesSignatureSection definition={definition} />
70 | 
71 |           <ConstraintsSection definition={definition} />
72 | 
73 |           <section id="reference">
74 |             <h2 className="text-2xl font-semibold mb-4">Reference Implementation</h2>
75 |             <DefinitionReference definition={definition} />
76 |           </section>
77 | 
78 |           <Suspense fallback={<div className="py-8 text-sm text-muted-foreground">Loading solutions…</div>}>
79 |             <SolutionsSection
80 |               definition={definition}
81 |               solutions={solutions}
82 |               traces={traces}
83 |               precomputed={precomputed}
84 |             />
85 |           </Suspense>
86 |         </div>
87 |       </div>
88 |     </div>
89 |   )
90 | }
91 | 


--------------------------------------------------------------------------------
/flashinfer_bench/apply/key.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from abc import ABC, abstractmethod
 4 | from typing import Any, Dict, Tuple, Type, Union
 5 | 
 6 | from pydantic import BaseModel, ConfigDict
 7 | 
 8 | from flashinfer_bench.data import Definition, Workload
 9 | 
10 | 
11 | class ApplyKey(BaseModel):
12 |     """Key for matching workloads to solutions in apply runtime.
13 | 
14 |     This is an immutable (frozen) model that can be used as dict keys or in sets.
15 |     """
16 | 
17 |     model_config = ConfigDict(frozen=True)
18 | 
19 |     axes: Tuple[Tuple[str, int], ...] = ()
20 |     """Variable axis values as sorted (name, value) tuples."""
21 |     feats: Tuple[Tuple[str, Union[int, float, bool]], ...] = ()
22 |     """Additional features extracted from input tensors."""
23 | 
24 | 
25 | class ApplyKeyBuilder(ABC):
26 |     def __init__(self, definition: Definition) -> None:
27 |         self.definition = definition
28 | 
29 |     @abstractmethod
30 |     def build_from_args(self, args: Tuple[Any, ...]) -> ApplyKey:
31 |         """Build a key from positional runtime arguments (inputs only)"""
32 |         ...
33 | 
34 |     @abstractmethod
35 |     def build_from_workload(self, workload: Workload) -> ApplyKey:
36 |         """Build a key from offline workload trace"""
37 |         ...
38 | 
39 |     @abstractmethod
40 |     def features(self, args: Tuple[Any, ...]) -> Tuple[Tuple[str, Any], ...]:
41 |         """Lightweight feature extraction from input args"""
42 |         ...
43 | 
44 | 
45 | # Key Builders
46 | 
47 | 
48 | class AxesOnlyKeyBuilder(ApplyKeyBuilder):
49 |     def build_from_args(self, args: Tuple[Any, ...]) -> ApplyKey:
50 |         axes = self.definition.get_axes_values_from_inputs(args)
51 |         return ApplyKey(axes=tuple(sorted(axes.items())))
52 | 
53 |     def build_from_workload(self, workload: Workload) -> ApplyKey:
54 |         axes = workload.axes
55 |         return ApplyKey(axes=tuple(sorted(axes.items())))
56 | 
57 |     def features(self, args: Tuple[Any, ...]) -> Tuple[Tuple[str, Any], ...]:
58 |         return ()
59 | 
60 | 
61 | # TODO(shanli): add more feature specific keys (e.g. avg_seq_len)
62 | class GEMMKeyBuilder(AxesOnlyKeyBuilder):
63 |     pass
64 | 
65 | 
66 | class GQAKeyBuilder(AxesOnlyKeyBuilder):
67 |     pass
68 | 
69 | 
70 | class MLAKeyBuilder(AxesOnlyKeyBuilder):
71 |     pass
72 | 
73 | 
74 | class ApplyKeyFactory:
75 |     _REGISTRY: Dict[str, Type[ApplyKeyBuilder]] = {}
76 | 
77 |     @classmethod
78 |     def register(cls, type_name: str, builder_cls: Type[ApplyKeyBuilder]) -> None:
79 |         cls._REGISTRY[type_name] = builder_cls
80 | 
81 |     @classmethod
82 |     def for_type(cls, type_name: str) -> Type[ApplyKeyBuilder]:
83 |         # Default to AxesOnlyKeyBuilder if not registered
84 |         return cls._REGISTRY.get(type_name, AxesOnlyKeyBuilder)
85 | 
86 |     @classmethod
87 |     def specialize(cls, definition: Definition) -> ApplyKeyBuilder:
88 |         builder_cls = cls.for_type(definition.op_type)
89 |         return builder_cls(definition)
90 | 
91 | 
92 | ApplyKeyFactory.register("gemm", GEMMKeyBuilder)
93 | ApplyKeyFactory.register("gqa", GQAKeyBuilder)
94 | ApplyKeyFactory.register("mla", MLAKeyBuilder)
95 | 


--------------------------------------------------------------------------------
/flashinfer_bench/bench/config.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from dataclasses import dataclass, field
 4 | from typing import List, Optional
 5 | 
 6 | 
 7 | @dataclass
 8 | class BenchmarkConfig:
 9 |     """Configuration for benchmark runs.
10 | 
11 |     All fields have default values to make configuration optional.
12 |     """
13 | 
14 |     warmup_runs: int = field(default=10)
15 |     iterations: int = field(default=50)
16 |     num_trials: int = field(default=3)
17 |     rtol: float = field(default=1e-2)
18 |     atol: float = field(default=1e-2)
19 |     log_dir: str = field(default="/tmp/flashinfer_bench")
20 |     use_isolated_runner: bool = field(default=False)
21 |     required_matched_ratio: Optional[float] = field(default=None)
22 |     sampling_validation_trials: int = field(default=100)
23 |     sampling_tvd_threshold: float = field(default=0.2)
24 |     definitions: Optional[List[str]] = field(default=None)
25 |     solutions: Optional[List[str]] = field(default=None)
26 |     timeout_seconds: int = field(default=300)
27 | 
28 |     def __post_init__(self):
29 |         if self.warmup_runs < 0:
30 |             raise ValueError("warmup_runs must be >= 0")
31 |         if self.iterations <= 0:
32 |             raise ValueError("iterations must be > 0")
33 |         if self.num_trials <= 0:
34 |             raise ValueError("num_trials must be > 0")
35 |         if self.rtol <= 0 or self.atol <= 0:
36 |             raise ValueError("rtol/atol must be > 0")
37 |         if not isinstance(self.rtol, float):
38 |             raise ValueError("rtol must be a float")
39 |         if not isinstance(self.atol, float):
40 |             raise ValueError("atol must be a float")
41 |         if self.required_matched_ratio is not None and not (
42 |             0.0 < self.required_matched_ratio <= 1.0
43 |         ):
44 |             raise ValueError("required_matched_ratio must be between 0 and 1")
45 |         if self.required_matched_ratio is not None and not isinstance(
46 |             self.required_matched_ratio, float
47 |         ):
48 |             raise ValueError("required_matched_ratio must be a float")
49 |         if self.sampling_validation_trials <= 0:
50 |             raise ValueError("sampling_validation_trials must be > 0")
51 |         if not isinstance(self.sampling_validation_trials, int):
52 |             raise ValueError("sampling_validation_trials must be an int")
53 |         if not (0.0 <= self.sampling_tvd_threshold <= 1.0):
54 |             raise ValueError("sampling_tvd_threshold must be between 0 and 1")
55 |         if not isinstance(self.sampling_tvd_threshold, float):
56 |             raise ValueError("sampling_tvd_threshold must be a float")
57 |         if self.timeout_seconds <= 0:
58 |             raise ValueError("timeout_seconds must be > 0")
59 |         if not isinstance(self.timeout_seconds, int):
60 |             raise ValueError("timeout_seconds must be an int")
61 |         if self.definitions is not None and not isinstance(self.definitions, list):
62 |             raise ValueError("definitions must be a list or None")
63 |         if self.solutions is not None and not isinstance(self.solutions, list):
64 |             raise ValueError("solutions must be a list or None")
65 | 


--------------------------------------------------------------------------------
/flashinfer_bench/compile/utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for building solutions."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import re
 6 | from pathlib import Path
 7 | from typing import List
 8 | 
 9 | from flashinfer_bench.data import Solution, SourceFile
10 | 
11 | 
12 | def write_sources_to_path(path: Path, sources: List[SourceFile]) -> List[Path]:
13 |     """Write source files to a directory and return their paths. Create path if not exists.
14 | 
15 |     This function writes all source files from a solution to a specified directory,
16 |     creating subdirectories as needed. It performs security checks to prevent path
17 |     traversal attacks and absolute path injection.
18 | 
19 |     Parameters
20 |     ----------
21 |     path : Path
22 |         The root directory where source files will be written.
23 |     sources : List[SourceFile]
24 |         The list of source files to write. Each file's path must be relative and
25 |         not contain parent directory references ("..").
26 | 
27 |     Returns
28 |     -------
29 |     List[Path]
30 |         List of absolute paths to the written files.
31 | 
32 |     Raises
33 |     ------
34 |     AssertionError
35 |         If any source file has an absolute path or contains path traversal.
36 |     """
37 |     path.mkdir(parents=True, exist_ok=True)
38 |     paths: List[Path] = []
39 |     for src in sources:
40 |         # Defensive assertion: path should be validated at Solution creation time
41 |         src_path_obj = Path(src.path)
42 | 
43 |         assert not src_path_obj.is_absolute(), f"Absolute path detected: {src.path}"
44 |         assert ".." not in src_path_obj.parts, f"Path traversal detected: {src.path}"
45 | 
46 |         src_path = path / src.path
47 | 
48 |         # Ensure parent directories exist
49 |         src_path.parent.mkdir(parents=True, exist_ok=True)
50 | 
51 |         # Write source file
52 |         src_path.write_text(src.content)
53 |         paths.append(src_path)
54 | 
55 |     return paths
56 | 
57 | 
58 | def create_package_name(solution: Solution, package_prefix: str = "") -> str:
59 |     """Generate a unique package name for a solution.
60 | 
61 |     The package name is constructed from three parts:
62 |     1. A prefix (typically identifying the builder)
63 |     2. The normalized solution name (alphanumeric and underscores only)
64 |     3. A 6-character hash of the solution content
65 | 
66 |     This ensures the package name is both human-readable and uniquely identifies
67 |     the solution's content.
68 | 
69 |     Parameters
70 |     ----------
71 |     solution : Solution
72 |         The solution to create a package name for.
73 |     prefix : str, optional
74 |         The prefix to prepend to the package name. Default is empty string.
75 | 
76 |     Returns
77 |     -------
78 |     str
79 |         A unique package name in the format: {prefix}{normalized_name}_{hash}.
80 | 
81 |     Examples
82 |     --------
83 |     >>> create_package_name(solution, "fib_python_")
84 |     'fib_python_rmsnorm_v1_a3f2b1'
85 |     """
86 |     # Normalize the solution name
87 |     s = re.sub(r"[^0-9a-zA-Z_]", "_", solution.name)
88 |     if not s or s[0].isdigit():
89 |         s = "_" + s
90 | 
91 |     return package_prefix + s + "_" + solution.hash()[:6]
92 | 


--------------------------------------------------------------------------------
/flashinfer_bench/bench/evaluators/utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for kernel evaluation.
 2 | 
 3 | This module provides helper functions for allocating output tensors and
 4 | normalizing kernel results during evaluation.
 5 | """
 6 | 
 7 | from typing import Any, List
 8 | 
 9 | import torch
10 | 
11 | from flashinfer_bench.data import Definition
12 | 
13 | 
14 | def allocate_outputs(definition: Definition, inputs: List[Any], device: str) -> List[torch.Tensor]:
15 |     """Allocate output tensors based on definition and input shapes.
16 | 
17 |     Infers variable axis values from input tensor shapes and allocates
18 |     empty output tensors with the correct shapes and dtypes.
19 | 
20 |     Parameters
21 |     ----------
22 |     definition : Definition
23 |         The kernel definition specifying output tensor specs.
24 |     inputs : List[Any]
25 |         List of input values (tensors or scalars) in definition order.
26 |     device : str
27 |         The device to allocate tensors on (e.g., "cuda:0").
28 | 
29 |     Returns
30 |     -------
31 |     List[torch.Tensor]
32 |         List of allocated (uninitialized) output tensors in definition order.
33 |     """
34 |     var_values = definition.get_axes_values_from_inputs(inputs)
35 |     output_shapes = definition.get_output_shapes(var_values)
36 | 
37 |     dtypes = definition.torch_output_dtypes
38 |     return [
39 |         torch.empty(shape, dtype=dtype, device=device)
40 |         for shape, dtype in zip(output_shapes, dtypes)
41 |     ]
42 | 
43 | 
44 | def normalize_result(definition: Definition, result: Any, device: str) -> List[torch.Tensor]:
45 |     """Normalize a value-returning kernel result to a tensor list.
46 | 
47 |     Converts various return types (scalar, tensor, tuple, list) to a
48 |     standardized list of tensors matching the definition's output order.
49 | 
50 |     Parameters
51 |     ----------
52 |     definition : Definition
53 |         The kernel definition specifying expected outputs.
54 |     result : Any
55 |         The kernel return value. Can be:
56 |         - A single value (int, float, bool, or torch.Tensor)
57 |         - A tuple or list of values
58 |     device : str
59 |         The device to place resulting tensors on.
60 | 
61 |     Returns
62 |     -------
63 |     List[torch.Tensor]
64 |         List of output tensors in definition order.
65 | 
66 |     Raises
67 |     ------
68 |     ValueError
69 |         If the number of returned values doesn't match the expected outputs.
70 |     """
71 |     dtypes = definition.torch_output_dtypes
72 |     n_outputs = len(dtypes)
73 | 
74 |     def to_tensor(v: Any, dtype: torch.dtype) -> torch.Tensor:
75 |         if isinstance(v, torch.Tensor):
76 |             return v.to(device) if str(v.device) != device else v
77 |         return torch.tensor(v, dtype=dtype, device=device)
78 | 
79 |     if isinstance(result, (tuple, list)):
80 |         if len(result) != n_outputs:
81 |             raise ValueError(
82 |                 f"Tuple/list has {len(result)} elements but {n_outputs} outputs expected"
83 |             )
84 |         return [to_tensor(v, dtypes[i]) for i, v in enumerate(result)]
85 | 
86 |     # Single value: tensor, int, float, bool
87 |     if n_outputs != 1:
88 |         raise ValueError(f"Single value returned but {n_outputs} outputs expected")
89 | 
90 |     return [to_tensor(result, dtypes[0])]
91 | 


--------------------------------------------------------------------------------
/tests/compile/test_triton_builder.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from pathlib import Path
  3 | 
  4 | import pytest
  5 | import torch
  6 | 
  7 | from flashinfer_bench.compile.builders import TritonBuilder
  8 | from flashinfer_bench.data import (
  9 |     AxisConst,
 10 |     BuildSpec,
 11 |     Definition,
 12 |     Solution,
 13 |     SourceFile,
 14 |     SupportedLanguages,
 15 |     TensorSpec,
 16 | )
 17 | 
 18 | 
 19 | @pytest.fixture(autouse=True)
 20 | def _use_tmp_cache_dir(tmp_cache_dir: Path) -> None:
 21 |     """Automatically use tmp_cache_dir for all tests in this module."""
 22 | 
 23 | 
 24 | @pytest.mark.requires_torch_cuda
 25 | def test_is_available(monkeypatch: pytest.MonkeyPatch) -> None:
 26 |     # Mock the import to make triton unavailable
 27 |     import builtins
 28 | 
 29 |     original_import = builtins.__import__
 30 | 
 31 |     def mock_import(name, *args, **kwargs):
 32 |         if name == "triton":
 33 |             raise ImportError("Mocked: triton not available")
 34 |         return original_import(name, *args, **kwargs)
 35 | 
 36 |     monkeypatch.setattr(builtins, "__import__", mock_import)
 37 | 
 38 |     assert not TritonBuilder.is_available()
 39 | 
 40 | 
 41 | @pytest.mark.requires_torch_cuda
 42 | def test_vector_add():
 43 |     definition = Definition(
 44 |         name="vec_add",
 45 |         op_type="op",
 46 |         axes={"N": AxisConst(value=256)},
 47 |         inputs={
 48 |             "X": TensorSpec(shape=["N"], dtype="float32"),
 49 |             "Y": TensorSpec(shape=["N"], dtype="float32"),
 50 |         },
 51 |         outputs={"Z": TensorSpec(shape=["N"], dtype="float32")},
 52 |         reference="import torch\n\ndef run(X, Y):\n    return X + Y",
 53 |     )
 54 | 
 55 |     triton_code = """
 56 | import torch
 57 | import triton
 58 | import triton.language as tl
 59 | 
 60 | @triton.jit
 61 | def add_kernel(x_ptr, y_ptr, z_ptr, n, BLOCK_SIZE: tl.constexpr):
 62 |     pid = tl.program_id(axis=0)
 63 |     offs = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
 64 |     mask = offs < n
 65 |     x = tl.load(x_ptr + offs, mask=mask)
 66 |     y = tl.load(y_ptr + offs, mask=mask)
 67 |     tl.store(z_ptr + offs, x + y, mask=mask)
 68 | 
 69 | def run(X, Y):
 70 |     n = X.numel()
 71 |     Z = torch.empty_like(X)
 72 |     BLOCK = 128
 73 |     grid = lambda meta: ( (n + meta['BLOCK_SIZE'] - 1) // meta['BLOCK_SIZE'], )
 74 |     add_kernel[grid](X, Y, Z, n, BLOCK_SIZE=BLOCK)
 75 |     return Z
 76 | """
 77 | 
 78 |     solution = Solution(
 79 |         name="triton_vec_add",
 80 |         definition="vec_add",
 81 |         author="tester",
 82 |         spec=BuildSpec(
 83 |             language=SupportedLanguages.TRITON,
 84 |             target_hardware=["cuda"],
 85 |             entry_point="module/main.py::run",
 86 |             destination_passing_style=False,
 87 |         ),
 88 |         sources=[SourceFile(path="module/main.py", content=triton_code)],
 89 |     )
 90 | 
 91 |     builder = TritonBuilder()
 92 |     runnable = builder.build(definition, solution)
 93 |     x_tensor = torch.arange(256, dtype=torch.float32, device="cuda")
 94 |     y_tensor = 2 * torch.ones(256, dtype=torch.float32, device="cuda")
 95 |     z_tensor = runnable(x_tensor, y_tensor)
 96 |     assert torch.allclose(z_tensor, x_tensor + y_tensor)
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     pytest.main(sys.argv)
101 | 


--------------------------------------------------------------------------------
/web/packages/ui/src/components/table.tsx:
--------------------------------------------------------------------------------
  1 | import * as React from "react"
  2 | import { cn } from "@flashinfer-bench/utils"
  3 | 
  4 | const Table = React.forwardRef<
  5 |   HTMLTableElement,
  6 |   React.HTMLAttributes<HTMLTableElement>
  7 | >(({ className, ...props }, ref) => (
  8 |   <div className="relative w-full overflow-auto">
  9 |     <table
 10 |       ref={ref}
 11 |       className={cn("w-full caption-bottom text-sm", className)}
 12 |       {...props}
 13 |     />
 14 |   </div>
 15 | ))
 16 | Table.displayName = "Table"
 17 | 
 18 | const TableHeader = React.forwardRef<
 19 |   HTMLTableSectionElement,
 20 |   React.HTMLAttributes<HTMLTableSectionElement>
 21 | >(({ className, ...props }, ref) => (
 22 |   <thead ref={ref} className={cn("[&_tr]:border-b", className)} {...props} />
 23 | ))
 24 | TableHeader.displayName = "TableHeader"
 25 | 
 26 | const TableBody = React.forwardRef<
 27 |   HTMLTableSectionElement,
 28 |   React.HTMLAttributes<HTMLTableSectionElement>
 29 | >(({ className, ...props }, ref) => (
 30 |   <tbody
 31 |     ref={ref}
 32 |     className={cn("[&_tr:last-child]:border-0", className)}
 33 |     {...props}
 34 |   />
 35 | ))
 36 | TableBody.displayName = "TableBody"
 37 | 
 38 | const TableFooter = React.forwardRef<
 39 |   HTMLTableSectionElement,
 40 |   React.HTMLAttributes<HTMLTableSectionElement>
 41 | >(({ className, ...props }, ref) => (
 42 |   <tfoot
 43 |     ref={ref}
 44 |     className={cn(
 45 |       "border-t bg-muted/50 font-medium [&>tr]:last:border-b-0",
 46 |       className
 47 |     )}
 48 |     {...props}
 49 |   />
 50 | ))
 51 | TableFooter.displayName = "TableFooter"
 52 | 
 53 | const TableRow = React.forwardRef<
 54 |   HTMLTableRowElement,
 55 |   React.HTMLAttributes<HTMLTableRowElement>
 56 | >(({ className, ...props }, ref) => (
 57 |   <tr
 58 |     ref={ref}
 59 |     className={cn(
 60 |       "border-b transition-colors hover:bg-muted/50 data-[state=selected]:bg-muted",
 61 |       className
 62 |     )}
 63 |     {...props}
 64 |   />
 65 | ))
 66 | TableRow.displayName = "TableRow"
 67 | 
 68 | const TableHead = React.forwardRef<
 69 |   HTMLTableCellElement,
 70 |   React.ThHTMLAttributes<HTMLTableCellElement>
 71 | >(({ className, ...props }, ref) => (
 72 |   <th
 73 |     ref={ref}
 74 |     className={cn(
 75 |       "h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0",
 76 |       className
 77 |     )}
 78 |     {...props}
 79 |   />
 80 | ))
 81 | TableHead.displayName = "TableHead"
 82 | 
 83 | const TableCell = React.forwardRef<
 84 |   HTMLTableCellElement,
 85 |   React.TdHTMLAttributes<HTMLTableCellElement>
 86 | >(({ className, ...props }, ref) => (
 87 |   <td
 88 |     ref={ref}
 89 |     className={cn("p-4 align-middle [&:has([role=checkbox])]:pr-0", className)}
 90 |     {...props}
 91 |   />
 92 | ))
 93 | TableCell.displayName = "TableCell"
 94 | 
 95 | const TableCaption = React.forwardRef<
 96 |   HTMLTableCaptionElement,
 97 |   React.HTMLAttributes<HTMLTableCaptionElement>
 98 | >(({ className, ...props }, ref) => (
 99 |   <caption
100 |     ref={ref}
101 |     className={cn("mt-4 text-sm text-muted-foreground", className)}
102 |     {...props}
103 |   />
104 | ))
105 | TableCaption.displayName = "TableCaption"
106 | 
107 | export {
108 |   Table,
109 |   TableHeader,
110 |   TableBody,
111 |   TableFooter,
112 |   TableHead,
113 |   TableRow,
114 |   TableCell,
115 |   TableCaption,
116 | }
117 | 


--------------------------------------------------------------------------------
/flashinfer_bench/bench/evaluators/evaluator.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from abc import ABC, abstractmethod
  4 | from pathlib import Path
  5 | from typing import Any, List, Optional, Tuple
  6 | 
  7 | import torch
  8 | 
  9 | from flashinfer_bench.bench.config import BenchmarkConfig
 10 | from flashinfer_bench.bench.runner.runner import DeviceBaseline
 11 | from flashinfer_bench.bench.utils import make_eval
 12 | from flashinfer_bench.compile import Runnable
 13 | from flashinfer_bench.data import (
 14 |     Correctness,
 15 |     Definition,
 16 |     Evaluation,
 17 |     EvaluationStatus,
 18 |     Performance,
 19 |     Workload,
 20 | )
 21 | 
 22 | 
 23 | class Evaluator(ABC):
 24 |     @classmethod
 25 |     @abstractmethod
 26 |     def can_evaluate(cls, definition: Definition) -> bool: ...
 27 | 
 28 |     @classmethod
 29 |     @abstractmethod
 30 |     def build_baseline(
 31 |         cls,
 32 |         definition: Definition,
 33 |         workload: Workload,
 34 |         cfg: BenchmarkConfig,
 35 |         device: str,
 36 |         traceset_root: Optional[Path] = None,
 37 |     ) -> DeviceBaseline: ...
 38 | 
 39 |     @classmethod
 40 |     @abstractmethod
 41 |     def check_correctness(
 42 |         cls,
 43 |         definition: Definition,
 44 |         sol_runnable: Runnable,
 45 |         inputs: List[List[Any]],
 46 |         ref_outputs: List[List[torch.Tensor]],
 47 |         cfg: BenchmarkConfig,
 48 |         log_path: str,
 49 |         device: str,
 50 |     ) -> Tuple[Optional[Correctness], Optional[Evaluation]]: ...
 51 | 
 52 |     @classmethod
 53 |     @abstractmethod
 54 |     def eval_performance(
 55 |         cls,
 56 |         definition: Definition,
 57 |         sol_runnable: Runnable,
 58 |         inputs: List[List[Any]],
 59 |         ref_mean_latency_ms: float,
 60 |         cfg: BenchmarkConfig,
 61 |         log_path: str,
 62 |         device: str,
 63 |     ) -> Tuple[Performance, Optional[Evaluation]]: ...
 64 | 
 65 |     @classmethod
 66 |     def evaluate(
 67 |         cls,
 68 |         definition: Definition,
 69 |         sol_runnable: Runnable,
 70 |         inputs: List[List[Any]],
 71 |         ref_outputs: List[List[torch.Tensor]],
 72 |         ref_mean_latency_ms: float,
 73 |         cfg: BenchmarkConfig,
 74 |         log_path: str,
 75 |         device: str,
 76 |     ) -> Evaluation:
 77 |         correctness, evaluation = cls.check_correctness(
 78 |             definition=definition,
 79 |             sol_runnable=sol_runnable,
 80 |             inputs=inputs,
 81 |             ref_outputs=ref_outputs,
 82 |             cfg=cfg,
 83 |             log_path=log_path,
 84 |             device=device,
 85 |         )
 86 |         if evaluation is not None:
 87 |             return evaluation
 88 | 
 89 |         performance, evaluation = cls.eval_performance(
 90 |             definition=definition,
 91 |             sol_runnable=sol_runnable,
 92 |             inputs=inputs,
 93 |             ref_mean_latency_ms=ref_mean_latency_ms,
 94 |             cfg=cfg,
 95 |             log_path=log_path,
 96 |             device=device,
 97 |         )
 98 | 
 99 |         if evaluation is not None:
100 |             return evaluation
101 | 
102 |         return make_eval(
103 |             status=EvaluationStatus.PASSED,
104 |             device=device,
105 |             log_path=log_path,
106 |             correctness=correctness,
107 |             performance=performance,
108 |         )
109 | 


--------------------------------------------------------------------------------
/web/apps/web/data/baselines.ts:
--------------------------------------------------------------------------------
  1 | const baselines = {
  2 |   fused_add_rmsnorm_h2048: {
  3 |     default: "flashinfer_wrapper_74a870",
  4 |   },
  5 |   fused_add_rmsnorm_h4096: {
  6 |     default: "flashinfer_wrapper_0ff432",
  7 |   },
  8 |   fused_add_rmsnorm_h7168: {
  9 |     default: "flashinfer_wrapper_5bddf1",
 10 |   },
 11 |   gemm_n128_k2048: {
 12 |     default: "torch_matmul_317103",
 13 |   },
 14 |   gemm_n2048_k4096: {
 15 |     default: "torch_matmul_926adc",
 16 |   },
 17 |   gemm_n256_k7168: {
 18 |     default: "torch_matmul_67278e",
 19 |   },
 20 |   gemm_n28672_k4096: {
 21 |     default: "torch_matmul_655587",
 22 |   },
 23 |   gemm_n4096_k14336: {
 24 |     default: "torch_matmul_254647",
 25 |   },
 26 |   gemm_n4096_k4096: {
 27 |     default: "torch_matmul_0d13df",
 28 |   },
 29 |   gemm_n5120_k2048: {
 30 |     default: "torch_matmul_075b0d",
 31 |   },
 32 |   gemm_n6144_k4096: {
 33 |     default: "torch_matmul_3b6488",
 34 |   },
 35 |   gqa_paged_decode_h32_kv4_d128_ps1: {
 36 |     default: "flashinfer_wrapper_78fd04",
 37 |   },
 38 |   gqa_paged_decode_h32_kv8_d128_ps1: {
 39 |     default: "flashinfer_wrapper_a9588f",
 40 |   },
 41 |   gqa_paged_prefill_causal_h32_kv4_d128_ps1: {
 42 |     default: "flashinfer_wrapper_71bd33",
 43 |   },
 44 |   gqa_paged_prefill_causal_h32_kv8_d128_ps1: {
 45 |     default: "flashinfer_wrapper_8cad92",
 46 |   },
 47 |   gqa_ragged_prefill_causal_h32_kv4_d128: {
 48 |     default: "flashinfer_wrapper_acea60",
 49 |   },
 50 |   gqa_ragged_prefill_causal_h32_kv8_d128: {
 51 |     default: "flashinfer_wrapper_f9a07b",
 52 |   },
 53 |   mla_paged_decode_h16_ckv512_kpe64_ps1: {
 54 |     default: "flashinfer_wrapper_03f7b0",
 55 |   },
 56 |   mla_paged_prefill_causal_h16_ckv512_kpe64_ps1: {
 57 |     default: "flashinfer_wrapper_ea3787",
 58 |   },
 59 |   moe_fp8_block_scale_ds_routing_topk8_ng8_kg4_e32_h7168_i2048: {
 60 |     default: "flashinfer_moe",
 61 |   },
 62 |   rmsnorm_h128: {
 63 |     default: "flashinfer_wrapper_57c111",
 64 |   },
 65 |   rmsnorm_h1536: {
 66 |     default: "flashinfer_wrapper_a27dc7",
 67 |   },
 68 |   rmsnorm_h2048: {
 69 |     default: "flashinfer_wrapper_0af255",
 70 |   },
 71 |   rmsnorm_h4096: {
 72 |     default: "flashinfer_wrapper_2e27cd",
 73 |   },
 74 |   rmsnorm_h512: {
 75 |     default: "flashinfer_wrapper_846dc8",
 76 |   },
 77 |   rmsnorm_h7168: {
 78 |     default: "flashinfer_wrapper_5d67c6",
 79 |   },
 80 |   top_k_sampling_from_probs_v128256: {
 81 |     default: "flashinfer_wrapper_d86b24bd",
 82 |   },
 83 |   top_k_sampling_from_probs_v129280: {
 84 |     default: "flashinfer_wrapper_4ec4ec35",
 85 |   },
 86 |   top_k_sampling_from_probs_v151936: {
 87 |     default: "flashinfer_wrapper_9c1e50fa",
 88 |   },
 89 |   top_k_top_p_sampling_from_probs_v128256: {
 90 |     default: "flashinfer_wrapper_211bdd6e",
 91 |   },
 92 |   top_k_top_p_sampling_from_probs_v129280: {
 93 |     default: "flashinfer_wrapper_a4e1e7cf",
 94 |   },
 95 |   top_k_top_p_sampling_from_probs_v151936: {
 96 |     default: "flashinfer_wrapper_0bb9995b",
 97 |   },
 98 |   top_p_sampling_from_probs_v128256: {
 99 |     default: "flashinfer_wrapper_5df4fa0b",
100 |   },
101 |   top_p_sampling_from_probs_v129280: {
102 |     default: "flashinfer_wrapper_4b28093b",
103 |   },
104 |   top_p_sampling_from_probs_v151936: {
105 |     default: "flashinfer_wrapper_32ca24af",
106 |   },
107 | } as const satisfies Record<string, Record<string, string>>
108 | 
109 | export default baselines
110 | 


--------------------------------------------------------------------------------
/docs/start/quick_start.mdx:
--------------------------------------------------------------------------------
  1 | # Quick Start
  2 | 
  3 | This guide shows you how to use FlashInfer-Bench python module with the FlashInfer-Trace dataset.
  4 | FlashInfer-Trace Dataset: https://huggingface.co/datasets/flashinfer-ai/flashinfer-trace
  5 | 
  6 | ## Benchmarking
  7 | 
  8 | ### Via CLI
  9 | 
 10 | Run benchmarks on a local trace dataset:
 11 | 
 12 | ```bash
 13 | flashinfer-bench run --local /path/to/flashinfer-trace
 14 | ```
 15 | 
 16 | #### Custom Options
 17 | 
 18 | ```bash
 19 | # Run with custom configuration
 20 | flashinfer-bench run --local /path/to/flashinfer-trace \
 21 |   --warmup-runs 10 \
 22 |   --iterations 100 \
 23 |   --num-trials 5 \
 24 |   --rtol 1e-3 \
 25 |   --atol 1e-3
 26 | 
 27 | # Run specific definitions or solutions
 28 | flashinfer-bench run --local /path/to/flashinfer-trace \
 29 |   --definitions gemm_n5120_k2048 rmsnorm_h128 \
 30 |   --solutions solution_name_1 solution_name_2...
 31 | 
 32 | # Resume interrupted runs
 33 | flashinfer-bench run --local /path/to/flashinfer-trace --resume
 34 | ```
 35 | 
 36 | 
 37 | ### Via Python API
 38 | 
 39 | ```python
 40 | from flashinfer_bench.bench import Benchmark, BenchmarkConfig
 41 | from flashinfer_bench.data import TraceSet
 42 | 
 43 | # Load trace dataset
 44 | trace_set = TraceSet.from_path("/path/to/flashinfer-trace")
 45 | 
 46 | # Configure benchmark
 47 | config = BenchmarkConfig(
 48 |     warmup_runs=10,
 49 |     iterations=100,
 50 |     num_trials=5,
 51 |     rtol=1e-3,
 52 |     atol=1e-3,
 53 | )
 54 | 
 55 | # Run benchmark
 56 | benchmark = Benchmark(trace_set, config)
 57 | benchmark.run_all(save_results=True)
 58 | 
 59 | # Get best solution for a definition, e.g. gemm_n5120_k2048
 60 | best_trace = trace_set.get_best_trace("gemm_n5120_k2048")
 61 | if best_trace:
 62 |     print(f"Best solution: {best_trace.solution}")
 63 |     print(f"Speedup: {best_trace.evaluation.performance.speedup_factor:.2f}×")
 64 | ```
 65 | 
 66 | How to use FlashInfer-Bench to automatically trace and optimize FlashInfer operations with custom kernels.
 67 | 
 68 | ## Tracing and Apply Overview
 69 | 
 70 | FlashInfer-Bench provides two key capabilities:
 71 | 1. **Tracing**: Automatically capture workload from your FlashInfer calls
 72 | 2. **Apply**: Automatically substitute optimized custom kernels for FlashInfer operations
 73 | 
 74 | With adapters already written for FlashInfer, you can enable these features with minimal code changes.
 75 | 
 76 | ## Basic Usage with Apply
 77 | 
 78 | The simplest way to use FlashInfer-Bench is through environment variables. Once you've installed FlashInfer-Bench, you can enable tracing and apply by:
 79 | 
 80 | 1. **Import `flashinfer_bench`** before importing FlashInfer
 81 | 2. **Set environment variables** to control behavior
 82 | 
 83 | ### Example: Drop-in Optimization
 84 | 
 85 | ```python
 86 | import flashinfer_bench  # Import to install adapters
 87 | from flashinfer.norm import fused_add_rmsnorm
 88 | 
 89 | # Your FlashInfer code runs as normal
 90 | # But optimized kernels are automatically applied when available
 91 | ```
 92 | 
 93 | ### Environment Variables
 94 | 
 95 | Control FlashInfer-Bench behavior with these environment variables:
 96 | 
 97 | - **`FIB_ENABLE_TRACING=1`**: Enable workload tracing to collect performance data
 98 | - **`FIB_ENABLE_APPLY=1`**: Enable automatic kernel substitution
 99 | - **`FIB_DATASET_PATH=/path/to/dataset`**: Specify where trace data and custom kernels are stored (default: `~/.cache/flashinfer_bench/dataset`)
100 | 


--------------------------------------------------------------------------------
/tests/compile/test_python_builder.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from pathlib import Path
  3 | 
  4 | import pytest
  5 | import torch
  6 | 
  7 | from flashinfer_bench.compile.builders import PythonBuilder
  8 | from flashinfer_bench.data import (
  9 |     AxisConst,
 10 |     BuildSpec,
 11 |     Definition,
 12 |     Solution,
 13 |     SourceFile,
 14 |     SupportedLanguages,
 15 |     TensorSpec,
 16 | )
 17 | 
 18 | 
 19 | @pytest.fixture(autouse=True)
 20 | def _use_tmp_cache_dir(tmp_cache_dir: Path) -> None:
 21 |     """Automatically use tmp_cache_dir for all tests in this module."""
 22 | 
 23 | 
 24 | def test_python_builder_minimum():
 25 |     definition = Definition(
 26 |         name="mm",
 27 |         op_type="op",
 28 |         axes={"M": AxisConst(value=2), "N": AxisConst(value=2)},
 29 |         inputs={
 30 |             "A": TensorSpec(shape=["M", "N"], dtype="float32"),
 31 |             "B": TensorSpec(shape=["M", "N"], dtype="float32"),
 32 |         },
 33 |         outputs={"C": TensorSpec(shape=["M", "N"], dtype="float32")},
 34 |         reference="import torch\n\ndef run(A, B):\n    return A",
 35 |     )
 36 |     solution = Solution(
 37 |         name="py_sol",
 38 |         definition="mm",
 39 |         author="me",
 40 |         spec=BuildSpec(
 41 |             language=SupportedLanguages.PYTHON,
 42 |             target_hardware=["cpu"],
 43 |             entry_point="pkg/main.py::run",
 44 |             destination_passing_style=False,
 45 |         ),
 46 |         sources=[SourceFile(path="pkg/main.py", content="def run(A, B):\n    return A")],
 47 |     )
 48 | 
 49 |     builder = PythonBuilder()
 50 |     runnable = builder.build(definition, solution)
 51 | 
 52 |     # Call runnable with torch tensors
 53 |     A = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
 54 |     B = torch.tensor([[0, 0], [0, 0]], dtype=torch.float32)
 55 |     out = runnable(A, B)
 56 |     assert torch.allclose(out, A)
 57 | 
 58 | 
 59 | def test_python_builder_add():
 60 |     definition = Definition(
 61 |         name="add",
 62 |         op_type="op",
 63 |         axes={"M": AxisConst(value=2), "N": AxisConst(value=2)},
 64 |         inputs={
 65 |             "X": TensorSpec(shape=["M", "N"], dtype="float32"),
 66 |             "Y": TensorSpec(shape=["M", "N"], dtype="float32"),
 67 |         },
 68 |         outputs={"Z": TensorSpec(shape=["M", "N"], dtype="float32")},
 69 |         reference="import torch\n\ndef run(X, Y):\n    return X + Y",
 70 |     )
 71 |     solution = Solution(
 72 |         name="add_py",
 73 |         definition="add",
 74 |         author="tester",
 75 |         spec=BuildSpec(
 76 |             language=SupportedLanguages.PYTHON,
 77 |             target_hardware=["cpu"],
 78 |             entry_point="main.py::run",
 79 |             destination_passing_style=False,
 80 |         ),
 81 |         sources=[
 82 |             SourceFile(
 83 |                 path="main.py",
 84 |                 content="""
 85 | import torch
 86 | def run(X: torch.Tensor, Y: torch.Tensor):
 87 |     return X + Y
 88 | """,
 89 |             )
 90 |         ],
 91 |     )
 92 | 
 93 |     builder = PythonBuilder()
 94 |     runnable = builder.build(definition, solution)
 95 |     X = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
 96 |     Y = torch.tensor([[5, 6], [7, 8]], dtype=torch.float32)
 97 |     out = runnable(X, Y)
 98 |     expected = torch.tensor([[6, 8], [10, 12]], dtype=torch.float32)
 99 |     assert torch.allclose(out, expected)
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     pytest.main(sys.argv)
104 | 


--------------------------------------------------------------------------------
/web/apps/web/app/leaderboard/section.tsx:
--------------------------------------------------------------------------------
  1 | import { LeaderboardClient } from "./client"
  2 | import {
  3 |   computeFastPCurvesForAuthors,
  4 |   computeAuthorCorrectnessSummary,
  5 |   type BaselineConfig,
  6 |   type CoverageStats,
  7 | } from "@/lib/analytics"
  8 | import type { Solution, Trace, Definition } from "@/lib/schemas"
  9 | import type { CurvePoint } from "@/lib/analytics"
 10 | 
 11 | type LeaderboardEntry = {
 12 |   definition: Definition
 13 |   solutions: Solution[]
 14 |   traces: Trace[]
 15 |   baseline?: BaselineConfig
 16 |   baselineNames: string[]
 17 | }
 18 | 
 19 | type LeaderboardSectionProps = {
 20 |   entries: LeaderboardEntry[]
 21 |   baselineLabel: string
 22 |   initialPinnedP?: number
 23 | }
 24 | 
 25 | type DefinitionAuthorDetail = {
 26 |   definition: Definition
 27 |   curves: Record<string, CurvePoint[]>
 28 |   comparisonCounts: Record<string, number>
 29 |   totalComparisons: number
 30 |   totalWorkloads: number
 31 |   coverage: Record<string, CoverageStats>
 32 |   solutionNamesByAuthor: Record<string, string[]>
 33 | }
 34 | 
 35 | export function LeaderboardSection({ entries, baselineLabel, initialPinnedP }: LeaderboardSectionProps) {
 36 |   const filteredEntries = entries.filter((entry) => entry.solutions.length > 0 && entry.traces.length > 0)
 37 | 
 38 |   const excludedAuthors = new Set<string>()
 39 |   for (const entry of entries) {
 40 |     const baselineNames = new Set(entry.baselineNames || [])
 41 |     if (baselineNames.size === 0) continue
 42 |     for (const solution of entry.solutions) {
 43 |       if (baselineNames.has(solution.name) && solution.author) {
 44 |         excludedAuthors.add(solution.author)
 45 |       }
 46 |     }
 47 |   }
 48 | 
 49 |   const fast = computeFastPCurvesForAuthors({
 50 |     datasets: filteredEntries.map((entry) => ({
 51 |       solutions: entry.solutions,
 52 |       traces: entry.traces,
 53 |       baseline: entry.baseline,
 54 |     })),
 55 |     sampleCount: 300,
 56 |   })
 57 | 
 58 |   const correctness = computeAuthorCorrectnessSummary({
 59 |     datasets: filteredEntries.map((entry) => ({
 60 |       solutions: entry.solutions,
 61 |       traces: entry.traces,
 62 |     })),
 63 |   })
 64 | 
 65 |   const definitionAuthorDetails: DefinitionAuthorDetail[] = filteredEntries.map((entry) => {
 66 |     const { curves, comparisonCounts, totalComparisons, totalWorkloads, coverage } = computeFastPCurvesForAuthors({
 67 |       datasets: [
 68 |         {
 69 |           solutions: entry.solutions,
 70 |           traces: entry.traces,
 71 |           baseline: entry.baseline,
 72 |         },
 73 |       ],
 74 |       sampleCount: 300,
 75 |     })
 76 | 
 77 |     const solutionNamesByAuthor = entry.solutions.reduce<Record<string, string[]>>((acc, solution) => {
 78 |       if (!solution.author) return acc
 79 |       const list = acc[solution.author] ?? []
 80 |       list.push(solution.name)
 81 |       acc[solution.author] = list
 82 |       return acc
 83 |     }, {})
 84 | 
 85 |     return {
 86 |       definition: entry.definition,
 87 |       curves,
 88 |       comparisonCounts,
 89 |       totalComparisons,
 90 |       totalWorkloads,
 91 |       coverage,
 92 |       solutionNamesByAuthor,
 93 |     }
 94 |   })
 95 | 
 96 |   return (
 97 |     <LeaderboardClient
 98 |       fast={fast}
 99 |       correctness={correctness}
100 |       excludedAuthors={[...excludedAuthors]}
101 |       baselineLabel={baselineLabel}
102 |       initialPinnedP={initialPinnedP}
103 |       definitionAuthorDetails={definitionAuthorDetails}
104 |     />
105 |   )
106 | }
107 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools>=61.0", "setuptools_scm", "wheel"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "flashinfer-bench"
  7 | dynamic = ["version"]
  8 | description = "FlashInfer-Bench - AI for AI Infrastructure for Accelerating AI Deployment"
  9 | readme = "README.md"
 10 | requires-python = ">=3.10,<4.0"
 11 | license = { text = "Apache License 2.0" }
 12 | authors = [{ name = "FlashInfer team" }]
 13 | keywords = ["deep-learning", "inference", "benchmark", "flashinfer", "gpu"]
 14 | classifiers = [
 15 |   "Development Status :: 4 - Beta",
 16 |   "Intended Audience :: Developers",
 17 |   "Intended Audience :: Science/Research",
 18 |   "License :: OSI Approved :: Apache Software License",
 19 |   "Programming Language :: Python :: 3",
 20 |   "Programming Language :: Python :: 3.10",
 21 |   "Programming Language :: Python :: 3.11",
 22 |   "Programming Language :: Python :: 3.12",
 23 |   "Programming Language :: Python :: 3.13",
 24 |   "Topic :: Scientific/Engineering :: Artificial Intelligence",
 25 | ]
 26 | dependencies = [
 27 |   "torch>=2.8.0",
 28 |   "pydantic>=2.0.0",
 29 |   "safetensors>=0.5.0",
 30 |   "flashinfer-python>=0.3.1",
 31 |   "apache-tvm-ffi>=0.1.2",
 32 | ]
 33 | 
 34 | [project.urls]
 35 | Homepage = "https://bench.flashinfer.ai"
 36 | Documentation = "https://bench.flashinfer.ai/docs"
 37 | Repository = "https://github.com/flashinfer-ai/flashinfer-bench"
 38 | Issues = "https://github.com/flashinfer-ai/flashinfer-bench/issues"
 39 | 
 40 | [project.optional-dependencies]
 41 | dev = [
 42 |   "pytest>=7.0.0",
 43 |   "pytest-cov>=4.0.0",
 44 |   "black>=22.0.0",
 45 |   "isort>=5.0.0",
 46 |   "mypy>=1.0.0",
 47 |   "ruff>=0.1.0",
 48 |   "pre-commit>=3.0.0",
 49 |   # setuptools is required by torch cpp extension, but torch does not declare it
 50 |   # as a dependency on python <= 3.11
 51 |   "setuptools",
 52 | ]
 53 | cuda12 = ["nvidia-cublas-cu12>=12.3", "nvidia-cudnn-cu12>=9.0"]
 54 | 
 55 | [project.scripts]
 56 | flashinfer-bench = "flashinfer_bench.cli.main:cli"
 57 | 
 58 | [tool.setuptools.packages.find]
 59 | where = ["."]
 60 | include = ["flashinfer_bench*"]
 61 | 
 62 | [tool.setuptools.package-data]
 63 | "flashinfer_bench" = ["py.typed"]
 64 | "flashinfer_bench._deps.cutlass" = ["include/**"]
 65 | 
 66 | [tool.black]
 67 | line-length = 100
 68 | target-version = ["py310", "py311", "py312", "py313"]
 69 | skip-magic-trailing-comma = true
 70 | 
 71 | [tool.isort]
 72 | profile = "black"
 73 | line_length = 100
 74 | 
 75 | [tool.ruff]
 76 | include = ["flashinfer_bench/**/*.py", "tests/**/*.py"]
 77 | line-length = 100
 78 | target-version = "py310"
 79 | 
 80 | [tool.ruff.lint]
 81 | # Never enforce `E501` (line length violations).
 82 | ignore = ["C901", "E501", "E741", "F402", "F823", "E731"]
 83 | # TID252: Prefer absolute imports over relative imports from parent modules
 84 | select = ["C", "E", "F", "W", "TID252"]
 85 | 
 86 | [tool.ruff.lint.per-file-ignores]
 87 | "__init__.py" = ["F401"]
 88 | "tests/*" = ["E741"]
 89 | "flashinfer_bench/compile/builders/triton_builder.py" = ["F401"]
 90 | 
 91 | [tool.mypy]
 92 | python_version = "3.10"
 93 | ignore_missing_imports = true
 94 | strict = true
 95 | 
 96 | [tool.pytest.ini_options]
 97 | testpaths = ["tests"]
 98 | python_files = ["test_*.py"]
 99 | addopts = "-rA --durations=0 --ignore=3rdparty"
100 | markers = ["requires_torch_cuda: tests that require torch and CUDA available"]
101 | 
102 | [tool.setuptools_scm]
103 | version_scheme = "python-simplified-semver"
104 | fallback_version = "0.0.0.dev0"
105 | 


--------------------------------------------------------------------------------
/flashinfer_bench/tracing/builtin/configs.py:
--------------------------------------------------------------------------------
 1 | """Built-in tracing configurations and presets.
 2 | 
 3 | This module provides pre-configured TracingConfig instances for common use cases
 4 | such as GEMM, attention kernels, etc.
 5 | """
 6 | 
 7 | from flashinfer_bench.tracing.config import TracingConfig
 8 | 
 9 | from .policies import AttentionFilterPolicy
10 | 
11 | # ============================================================================
12 | # TracingConfig Presets
13 | # ============================================================================
14 | 
15 | gemm_config = TracingConfig(input_dump_policy="dump_none", filter_policy="keep_first_by_axes")
16 | 
17 | mla_paged_prefill_config = TracingConfig(
18 |     input_dump_policy=["qo_indptr", "kv_indptr", "kv_indices", "sm_scale"],
19 |     filter_policy=lambda: AttentionFilterPolicy(k=1),
20 | )
21 | 
22 | 
23 | mla_paged_decode_config = TracingConfig(
24 |     input_dump_policy=["kv_indptr", "kv_indices", "sm_scale"],
25 |     filter_policy=lambda: AttentionFilterPolicy(k=1),
26 | )
27 | 
28 | gqa_paged_prefill_config = TracingConfig(
29 |     input_dump_policy=["qo_indptr", "kv_indptr", "kv_indices", "sm_scale"],
30 |     filter_policy=lambda: AttentionFilterPolicy(k=1),
31 | )
32 | 
33 | gqa_ragged_prefill_config = TracingConfig(
34 |     input_dump_policy=["qo_indptr", "kv_indptr", "sm_scale"],
35 |     filter_policy=lambda: AttentionFilterPolicy(k=1),
36 | )
37 | 
38 | gqa_paged_decode_config = TracingConfig(
39 |     input_dump_policy=["kv_indptr", "kv_indices", "sm_scale"],
40 |     filter_policy=lambda: AttentionFilterPolicy(k=1),
41 | )
42 | 
43 | all_dump_config = TracingConfig(input_dump_policy="dump_all", filter_policy="keep_all")
44 | 
45 | axes_only_config = TracingConfig(input_dump_policy="dump_none", filter_policy="keep_first_by_axes")
46 | 
47 | FULL_TRACING_CONFIGS = {
48 |     "gemm_n128_k2048": gemm_config,
49 |     "gemm_n256_k7168": gemm_config,
50 |     "gemm_n2048_k4096": gemm_config,
51 |     "gemm_n4096_k14336": gemm_config,
52 |     "gemm_n4096_k4096": gemm_config,
53 |     "gemm_n5120_k2048": gemm_config,
54 |     "gemm_n6144_k4096": gemm_config,
55 |     "gemm_n28672_k4096": gemm_config,
56 |     "gqa_paged_decode_h32_kv4_d128_ps1": gqa_paged_decode_config,
57 |     "gqa_paged_decode_h32_kv8_d128_ps1": gqa_paged_decode_config,
58 |     "gqa_paged_prefill_causal_h32_kv4_d128_ps1": gqa_paged_prefill_config,
59 |     "gqa_paged_prefill_causal_h32_kv8_d128_ps1": gqa_paged_prefill_config,
60 |     "gqa_ragged_prefill_causal_h32_kv4_d128": gqa_ragged_prefill_config,
61 |     "gqa_ragged_prefill_causal_h32_kv8_d128": gqa_ragged_prefill_config,
62 |     "mla_paged_decode_h16_ckv512_kpe64_ps1": mla_paged_decode_config,
63 |     "mla_paged_prefill_causal_h16_ckv512_kpe64_ps1": mla_paged_prefill_config,
64 |     "fused_add_rmsnorm_h2048": axes_only_config,
65 |     "fused_add_rmsnorm_h4096": axes_only_config,
66 |     "fused_add_rmsnorm_h7168": axes_only_config,
67 | }
68 | 
69 | ATTN_ONLY_TRACING_CONFIGS = {
70 |     "gqa_paged_decode_h32_kv4_d128_ps1": gqa_paged_decode_config,
71 |     "gqa_paged_decode_h32_kv8_d128_ps1": gqa_paged_decode_config,
72 |     "gqa_paged_prefill_causal_h32_kv4_d128_ps1": gqa_paged_prefill_config,
73 |     "gqa_paged_prefill_causal_h32_kv8_d128_ps1": gqa_paged_prefill_config,
74 |     "gqa_ragged_prefill_causal_h32_kv4_d128": gqa_ragged_prefill_config,
75 |     "gqa_ragged_prefill_causal_h32_kv8_d128": gqa_ragged_prefill_config,
76 |     "mla_paged_decode_h16_ckv512_kpe64_ps1": mla_paged_decode_config,
77 |     "mla_paged_prefill_causal_h16_ckv512_kpe64_ps1": mla_paged_prefill_config,
78 | }
79 | 


--------------------------------------------------------------------------------
/web/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/nextjs,node
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=nextjs,node
  3 | 
  4 | # env
  5 | .env
  6 | .env.local
  7 | 
  8 | # Claude Code
  9 | .claude/
 10 | 
 11 | ### NextJS ###
 12 | # dependencies
 13 | /node_modules
 14 | /.pnp
 15 | .pnp.js
 16 | 
 17 | # testing
 18 | /coverage
 19 | 
 20 | # next.js
 21 | /.next/
 22 | /out/
 23 | 
 24 | # production
 25 | /build
 26 | 
 27 | # misc
 28 | .DS_Store
 29 | *.pem
 30 | 
 31 | # debug
 32 | npm-debug.log*
 33 | yarn-debug.log*
 34 | yarn-error.log*
 35 | .pnpm-debug.log*
 36 | 
 37 | # local env files
 38 | .env*.local
 39 | 
 40 | # vercel
 41 | .vercel
 42 | 
 43 | # typescript
 44 | *.tsbuildinfo
 45 | next-env.d.ts
 46 | 
 47 | ### Node ###
 48 | # Logs
 49 | logs
 50 | *.log
 51 | lerna-debug.log*
 52 | 
 53 | # Diagnostic reports (https://nodejs.org/api/report.html)
 54 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 55 | 
 56 | # Runtime data
 57 | pids
 58 | *.pid
 59 | *.seed
 60 | *.pid.lock
 61 | 
 62 | # Directory for instrumented libs generated by jscoverage/JSCover
 63 | lib-cov
 64 | 
 65 | # Coverage directory used by tools like istanbul
 66 | coverage
 67 | *.lcov
 68 | 
 69 | # nyc test coverage
 70 | .nyc_output
 71 | 
 72 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 73 | .grunt
 74 | 
 75 | # Bower dependency directory (https://bower.io/)
 76 | bower_components
 77 | 
 78 | # node-waf configuration
 79 | .lock-wscript
 80 | 
 81 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 82 | build/Release
 83 | 
 84 | # Dependency directories
 85 | node_modules/
 86 | jspm_packages/
 87 | 
 88 | # Snowpack dependency directory (https://snowpack.dev/)
 89 | web_modules/
 90 | 
 91 | # TypeScript cache
 92 | 
 93 | # Optional npm cache directory
 94 | .npm
 95 | 
 96 | # Optional eslint cache
 97 | .eslintcache
 98 | 
 99 | # Optional stylelint cache
100 | .stylelintcache
101 | 
102 | # Microbundle cache
103 | .rpt2_cache/
104 | .rts2_cache_cjs/
105 | .rts2_cache_es/
106 | .rts2_cache_umd/
107 | 
108 | # Optional REPL history
109 | .node_repl_history
110 | 
111 | # Output of 'npm pack'
112 | *.tgz
113 | 
114 | # Yarn Integrity file
115 | .yarn-integrity
116 | 
117 | # dotenv environment variable files
118 | .env
119 | .env.development.local
120 | .env.test.local
121 | .env.production.local
122 | .env.local
123 | 
124 | # parcel-bundler cache (https://parceljs.org/)
125 | .cache
126 | .parcel-cache
127 | 
128 | # Next.js build output
129 | .next
130 | out
131 | 
132 | # Nuxt.js build / generate output
133 | .nuxt
134 | dist
135 | 
136 | # Gatsby files
137 | .cache/
138 | # Comment in the public line in if your project uses Gatsby and not Next.js
139 | # https://nextjs.org/blog/next-9-1#public-directory-support
140 | # public
141 | 
142 | # vuepress build output
143 | .vuepress/dist
144 | 
145 | # vuepress v2.x temp and cache directory
146 | .temp
147 | 
148 | # Docusaurus cache and generated files
149 | .docusaurus
150 | 
151 | # Serverless directories
152 | .serverless/
153 | 
154 | # FuseBox cache
155 | .fusebox/
156 | 
157 | # Turbo
158 | .turbo/
159 | 
160 | # DynamoDB Local files
161 | .dynamodb/
162 | 
163 | # TernJS port file
164 | .tern-port
165 | 
166 | # Stores VSCode versions used for testing VSCode extensions
167 | .vscode-test
168 | 
169 | # yarn v2
170 | .yarn/cache
171 | .yarn/unplugged
172 | .yarn/build-state.yml
173 | .yarn/install-state.gz
174 | .pnp.*
175 | 
176 | ### Node Patch ###
177 | # Serverless Webpack directories
178 | .webpack/
179 | 
180 | # Optional stylelint cache
181 | 
182 | # SvelteKit build / generate output
183 | .svelte-kit
184 | 
185 | # End of https://www.toptal.com/developers/gitignore/api/nextjs,node
186 | 


--------------------------------------------------------------------------------
/flashinfer_bench/integration/flashinfer/common.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import Any, Optional
  3 | 
  4 | import torch
  5 | 
  6 | 
  7 | def infer_kv_layout_from_args(inst) -> Optional[str]:
  8 |     layout = getattr(inst, "kv_layout", None)
  9 |     if isinstance(layout, str) and layout.upper() in ("NHD", "HND"):
 10 |         return layout.upper()
 11 |     return None
 12 | 
 13 | 
 14 | def infer_paged_kv_layout_from_tensors(paged_kv_cache, num_kv_heads: int) -> Optional[str]:
 15 |     # tuple: (k, v) 4D
 16 |     if isinstance(paged_kv_cache, tuple):
 17 |         k0 = paged_kv_cache[0]
 18 |         ndim = getattr(k0, "ndim", None)
 19 |         if ndim == 4:
 20 |             # NHD: [P, S, H, D]  => dim2 == H
 21 |             if k0.shape[2] == num_kv_heads:
 22 |                 return "NHD"
 23 |             # HND: [P, H, S, D]  => dim1 == H
 24 |             if k0.shape[1] == num_kv_heads:
 25 |                 return "HND"
 26 |         return None
 27 | 
 28 |     # single 5D: [P, 2, S/H, H/S, D]
 29 |     x = paged_kv_cache
 30 |     ndim = getattr(x, "ndim", None)
 31 |     if ndim == 5:
 32 |         # NHD: x[:, 0] -> [P, S, H, D] => dim3 == H
 33 |         if x.shape[3] == num_kv_heads:
 34 |             return "NHD"
 35 |         # HND: x[:, 0] -> [P, H, S, D] => dim2 == H
 36 |         if x.shape[2] == num_kv_heads:
 37 |             return "HND"
 38 |     return None
 39 | 
 40 | 
 41 | def infer_ragged_kv_layout_from_tensors(ragged_k_or_v, num_kv_heads: int) -> Optional[str]:
 42 |     if ragged_k_or_v.dim() != 3:
 43 |         return None
 44 |     if ragged_k_or_v.shape[1] == num_kv_heads:
 45 |         return "NHD"
 46 |     elif ragged_k_or_v.shape[0] == num_kv_heads:
 47 |         return "HND"
 48 |     return None
 49 | 
 50 | 
 51 | def normalize_paged_kv_to_nhd(paged_kv_cache, kv_layout: str):
 52 |     if isinstance(paged_kv_cache, tuple):
 53 |         k, v = paged_kv_cache
 54 |         if kv_layout == "NHD":
 55 |             return k, v
 56 |         else:  # HND: [P, H, S, D]
 57 |             return k.permute(0, 2, 1, 3), v.permute(0, 2, 1, 3)
 58 | 
 59 |     x: torch.Tensor = paged_kv_cache
 60 |     assert x.dim() == 5, "paged_kv_cache must be 5D when passed as a single tensor"
 61 |     if kv_layout == "NHD":
 62 |         k = x[:, 0]
 63 |         v = x[:, 1]
 64 |         return k, v
 65 |     else:
 66 |         k = x[:, 0].permute(0, 2, 1, 3)
 67 |         v = x[:, 1].permute(0, 2, 1, 3)
 68 |         return k, v
 69 | 
 70 | 
 71 | def normalize_ragged_kv_to_nhd(ragged_k_or_v, kv_layout: str):
 72 |     if kv_layout == "NHD":
 73 |         return ragged_k_or_v
 74 |     else:
 75 |         return ragged_k_or_v.permute(1, 0, 2)
 76 | 
 77 | 
 78 | def pick_sm_scale_gqa(head_dim: int, maybe: Any) -> float:
 79 |     if maybe is None:
 80 |         return 1.0 / math.sqrt(float(head_dim))
 81 |     if isinstance(maybe, torch.Tensor):
 82 |         return float(maybe.item())
 83 |     return float(maybe)
 84 | 
 85 | 
 86 | def pick_sm_scale_mla(head_dim_qk_nope: int, head_dim_qk_pe: int, maybe: Any) -> float:
 87 |     if maybe is None:
 88 |         return 1.0 / math.sqrt(float(head_dim_qk_nope + head_dim_qk_pe))
 89 |     if isinstance(maybe, torch.Tensor):
 90 |         return float(maybe.item())
 91 |     return float(maybe)
 92 | 
 93 | 
 94 | # TODO(shanli): make kernels to take pre-allocated buffers and write in-place
 95 | def write_back_outputs(
 96 |     *, output: torch.Tensor, lse: torch.Tensor, want_lse: bool, out_buf=None, lse_buf=None
 97 | ):
 98 |     if out_buf is not None:
 99 |         out_buf.copy_(output)
100 |         output = out_buf
101 |     if want_lse:
102 |         if lse_buf is not None:
103 |             lse_buf.copy_(lse)
104 |             lse = lse_buf
105 |         return output, lse
106 |     return output
107 | 


--------------------------------------------------------------------------------
/web/apps/web/components/model-card.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import React from "react"
 4 | import Link from "next/link"
 5 | import { usePathname, useRouter } from "next/navigation"
 6 | import { Copy, Check, Filter } from "lucide-react"
 7 | import { Card, CardContent, CardDescription, CardHeader, CardTitle, ProgressCircle, Button } from "@flashinfer-bench/ui"
 8 | import { Model } from "@/lib/schemas"
 9 | 
10 | interface ModelCardProps {
11 |   model: Model
12 |   href: string
13 | }
14 | 
15 | export function ModelCard({ model, href }: ModelCardProps) {
16 |   const router = useRouter()
17 |   const pathname = usePathname()
18 |   const [copied, setCopied] = React.useState(false)
19 | 
20 |   // Count only layers (kernels)
21 |   const layerModules = Object.values(model.modules).filter(m => m.type === "layer")
22 |   const totalKernels = layerModules.length
23 |   const tracedKernels = layerModules.filter(m => (m.definitions?.length ?? 0) > 0).length
24 | 
25 |   const handleCopy = async (e: React.MouseEvent) => {
26 |     e.preventDefault()
27 |     e.stopPropagation()
28 |     await navigator.clipboard.writeText(model.id)
29 |     setCopied(true)
30 |     setTimeout(() => setCopied(false), 2000)
31 |   }
32 | 
33 |   const handleFilter = (e: React.MouseEvent) => {
34 |     e.preventDefault()
35 |     e.stopPropagation()
36 |     const query = `model:${model.id}`
37 |     if (typeof window !== "undefined" && pathname === "/") {
38 |       window.dispatchEvent(new CustomEvent("kernelSearch", { detail: query }))
39 |       return
40 |     }
41 |     router.push(`/?kernel_search=${encodeURIComponent(query)}`)
42 |   }
43 | 
44 |   return (
45 |     <Link href={href}>
46 |       <Card className="hover:shadow-lg hover:border-primary transition-all cursor-pointer h-full">
47 |         <CardHeader>
48 |           <div className="flex items-center justify-between">
49 |             <div className="flex items-center flex-1 min-w-0">
50 |               <div className="group flex items-center gap-2 min-w-0">
51 |                 <CardTitle className="text-lg truncate">{model.name}</CardTitle>
52 |                 <button
53 |                   onClick={handleCopy}
54 |                   className="opacity-0 group-hover:opacity-100 transition-opacity flex-shrink-0"
55 |                   aria-label="Copy model ID"
56 |                 >
57 |                   {copied ? (
58 |                     <Check className="h-3 w-3 text-green-600" />
59 |                   ) : (
60 |                     <Copy className="h-3 w-3 text-muted-foreground hover:text-foreground" />
61 |                   )}
62 |                 </button>
63 |               </div>
64 |             </div>
65 |             <Button
66 |               variant="ghost"
67 |               size="icon"
68 |               className="h-7 w-7 flex-shrink-0"
69 |               onClick={handleFilter}
70 |             >
71 |               <Filter className="h-3.5 w-3.5" />
72 |             </Button>
73 |           </div>
74 |           <CardDescription>{model.description}</CardDescription>
75 |         </CardHeader>
76 |         <CardContent>
77 |           <div className="flex items-center justify-between">
78 |             <div className="text-sm text-muted-foreground">
79 |               <span>{totalKernels} kernels</span>
80 |             </div>
81 |             <div className="flex items-center gap-2">
82 |               <span className="text-sm text-muted-foreground">
83 |                 {tracedKernels}/{totalKernels} traced
84 |               </span>
85 |               <ProgressCircle
86 |                 value={tracedKernels}
87 |                 max={totalKernels}
88 |                 size={24}
89 |                 strokeWidth={2.5}
90 |               />
91 |             </div>
92 |           </div>
93 |         </CardContent>
94 |       </Card>
95 |     </Link>
96 |   )
97 | }
98 | 


--------------------------------------------------------------------------------
/tests/compile/test_utils.py:
--------------------------------------------------------------------------------
  1 | """Tests for compile/utils.py."""
  2 | 
  3 | import sys
  4 | 
  5 | import pytest
  6 | 
  7 | from flashinfer_bench.compile.utils import create_package_name, write_sources_to_path
  8 | from flashinfer_bench.data import BuildSpec, Solution, SourceFile, SupportedLanguages
  9 | 
 10 | 
 11 | def test_write_sources_to_path(tmp_path):
 12 |     """Test that write_sources_to_path creates files correctly."""
 13 |     sources = [
 14 |         SourceFile(path="main.py", content="print('hello')"),
 15 |         SourceFile(path="pkg/helper.py", content="def helper(): pass"),
 16 |     ]
 17 | 
 18 |     paths = write_sources_to_path(tmp_path, sources)
 19 | 
 20 |     assert len(paths) == 2
 21 |     assert (tmp_path / "main.py").exists()
 22 |     assert (tmp_path / "main.py").read_text() == "print('hello')"
 23 |     assert (tmp_path / "pkg" / "helper.py").exists()
 24 |     assert (tmp_path / "pkg" / "helper.py").read_text() == "def helper(): pass"
 25 | 
 26 | 
 27 | def test_create_package_name():
 28 |     """Test package name creation."""
 29 |     solution = Solution(
 30 |         name="my_solution",
 31 |         definition="test_def",
 32 |         author="test",
 33 |         spec=BuildSpec(
 34 |             language=SupportedLanguages.PYTHON, target_hardware=["cpu"], entry_point="main.py::run"
 35 |         ),
 36 |         sources=[SourceFile(path="main.py", content="def run(): pass")],
 37 |     )
 38 | 
 39 |     package_name = create_package_name(solution, "fib_python_")
 40 | 
 41 |     # Should start with prefix
 42 |     assert package_name.startswith("fib_python_")
 43 |     # Should contain normalized solution name
 44 |     assert "my_solution" in package_name
 45 |     # Should end with hash
 46 |     assert len(package_name.split("_")[-1]) == 6  # 6-char hash
 47 | 
 48 | 
 49 | def test_create_package_name_normalization():
 50 |     """Test that special characters are normalized to underscores."""
 51 |     spec = BuildSpec(
 52 |         language=SupportedLanguages.PYTHON, target_hardware=["cpu"], entry_point="main.py::run"
 53 |     )
 54 |     sources = [SourceFile(path="main.py", content="def run(): pass")]
 55 | 
 56 |     solution = Solution(
 57 |         name="my-solution.v1@test", definition="test_def", author="test", spec=spec, sources=sources
 58 |     )
 59 |     package_name = create_package_name(solution, "")
 60 |     assert package_name.startswith("my_solution_v1_test")
 61 | 
 62 |     # Special characters should be replaced with underscores
 63 |     assert "-" not in package_name
 64 |     assert "." not in package_name
 65 |     assert "@" not in package_name
 66 | 
 67 |     solution2 = Solution(
 68 |         name="123solution", definition="test_def", author="test", spec=spec, sources=sources
 69 |     )
 70 |     package_name2 = create_package_name(solution2, "")
 71 |     assert package_name2.startswith("_123solution")
 72 | 
 73 | 
 74 | def test_create_package_name_deterministic():
 75 |     """Test that the same solution produces the same package name."""
 76 |     spec = BuildSpec(
 77 |         language=SupportedLanguages.PYTHON, target_hardware=["cpu"], entry_point="main.py::run"
 78 |     )
 79 |     solution1 = Solution(
 80 |         name="my_solution",
 81 |         definition="test_def",
 82 |         author="test",
 83 |         spec=spec,
 84 |         sources=[SourceFile(path="main.py", content="def run(): return 1")],
 85 |     )
 86 | 
 87 |     name1 = create_package_name(solution1, "prefix_")
 88 |     name2 = create_package_name(solution1, "prefix_")
 89 | 
 90 |     assert name1 == name2
 91 | 
 92 |     solution2 = Solution(
 93 |         name="my_solution",
 94 |         definition="test_def",
 95 |         author="test",
 96 |         spec=spec,
 97 |         sources=[SourceFile(path="main.py", content="def run(): return 2")],
 98 |     )
 99 |     name3 = create_package_name(solution2, "prefix_")
100 | 
101 |     assert name1 != name3
102 | 
103 | 
104 | if __name__ == "__main__":
105 |     pytest.main(sys.argv)
106 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to FlashInfer-bench
 2 | 
 3 | We welcome contributions of all kinds, including new features, bug fixes, documentation improvements, and more. To ensure a smooth process, here is a general guide to contributing.
 4 | 
 5 | For significant changes, such as adding a major new feature or refactoring core code, it's often a good idea to open a GitHub issue first to discuss your proposal. This step is optional, but can be very helpful as it allows the maintainers and the community to provide feedback and helps ensure your work aligns with the project's goals.
 6 | 
 7 | The general workflow for submitting a change is:
 8 | 
 9 | 1. **Fork the repository** and create a new branch for your work.
10 | 2. Make your changes, including adding tests if applicable. Please refer to the [`README.md`](README.md) for project-specific setup and testing instructions.
11 | 3. Push your changes to your fork and **open a pull request** to the main repository. Please provide a clear description of your changes and link to the relevant issue if one exists.
12 | 4. **Iterate on the pull request** by responding to feedback from reviewers until the change is ready to be merged.
13 | 
14 | ## **Pull Request Naming Convention**
15 | 
16 | To maintain consistency and clarity, please follow this naming convention for your pull requests:
17 | 
18 | ```
19 | <type>: <brief description>
20 | ```
21 | 
22 | **Available types:**
23 | 
24 | * `feat`: A new feature or enhancement
25 | * `fix`: A bug fix
26 | * `perf`: Performance improvement
27 | * `refactor`: Code refactoring without changing functionality
28 | * `test`: Adding or updating tests
29 | * `docs`: Documentation changes
30 | * `style`: Code style changes (formatting, whitespace, etc.)
31 | * `build`: Changes to build system or dependencies
32 | * `ci`: Changes to CI/CD configuration
33 | * `chore`: Maintenance tasks and other changes
34 | 
35 | **Examples:**
36 | 
37 | * `feat: add FP8 kernel benchmark`
38 | * `fix: correct memory allocation in attention kernel`
39 | * `perf: optimize fused MoE throughput`
40 | * `docs: update installation guide`
41 | * `test: add unit tests for decode kernel`
42 | 
43 | ## **Review**
44 | 
45 | Once you've opened a pull request, the review process begins:
46 | 
47 | * **Community Review:** We encourage everyone to participate in the review process. All feedback on pull requests is welcome and valued.
48 | * **Approval:** For a pull request to be merged, it must receive at least **one approval** from designated code owners for the files you've changed, or from a project lead. The [`CODEOWNERS`](./CODEOWNERS) file in the root of the repository lists the members responsible for different parts of the codebase.
49 | 
50 | We hope this collaborative approach will maintain high code quality and ensure knowledge is shared effectively among contributors.
51 | 
52 | ## **Merge**
53 | 
54 | After your pull request has been approved and all automated checks (CI) have passed, a **Community Committer** will merge it into the main branch.
55 | 
56 | ### **Performance and Stability**
57 | 
58 | Maintaining high performance and stability are key goals for FlashInfer-bench. If a performance regression or functional issue occurs after a merge, we encourage anyone to report it. The process is as follows:
59 | 
60 | 1. **File an Issue:** Anyone can file a high-priority issue in the repository. It is helpful to tag the original pull request and notify the author.
61 | 2. **Collaboration:** A project committer or lead will collaborate with the community to address the issue.
62 | 3. **Resolution:** If a quick fix is not available, the change may be reverted to maintain project stability.
63 | 
64 | ## **Community Committer Role**
65 | 
66 | FlashInfer-bench is maintained by a group of **Community Committers**. These are core contributors who have earned the role by providing frequent and valuable contributions to the project. They are responsible for reviewing and merging pull requests, maintaining the project's standards, and guiding new contributors.
67 | 


--------------------------------------------------------------------------------