├── .gitignore ├── LICENSE ├── README.md ├── experiments ├── benchmark │ ├── infinitebench │ │ ├── code_debug.yaml │ │ ├── data │ │ │ └── InfiniteBench.py │ │ ├── download_dataset.sh │ │ ├── kv_retrieval.yaml │ │ ├── longbook_choice_eng.yaml │ │ ├── longbook_qa_chn.yaml │ │ ├── longbook_qa_eng.yaml │ │ ├── longbook_sum_eng.yaml │ │ ├── longdialogue_qa_eng.yaml │ │ ├── math_find.yaml │ │ ├── metrics.py │ │ ├── number_string.yaml │ │ ├── passkey.yaml │ │ ├── rouge.py │ │ └── utils.py │ ├── ruler │ │ ├── data │ │ │ ├── prepare.py │ │ │ ├── synthetic │ │ │ │ ├── common_words_extraction.py │ │ │ │ ├── constants.py │ │ │ │ ├── freq_words_extraction.py │ │ │ │ ├── json │ │ │ │ │ ├── PaulGrahamEssays_URLs.txt │ │ │ │ │ ├── download_paulgraham_essay.py │ │ │ │ │ └── download_qa_dataset.sh │ │ │ │ ├── niah.py │ │ │ │ ├── qa.py │ │ │ │ └── variable_tracking.py │ │ │ ├── template.py │ │ │ └── tokenizer.py │ │ ├── debug_run.sh │ │ ├── download_dataset.sh │ │ ├── eval │ │ │ ├── evaluate.py │ │ │ └── synthetic │ │ │ │ └── constants.py │ │ └── synthetic.yaml │ ├── run_infinitebench.py │ ├── run_ruler.py │ └── utils.py ├── download_data.sh ├── download_model.sh └── scripts │ ├── flex_prefill │ ├── infinitebench.sh │ └── ruler.sh │ ├── full │ ├── infinitebench.sh │ └── ruler.sh │ ├── minfer │ ├── infinitebench.sh │ └── ruler.sh │ └── streaming_llm │ ├── infinitebench.sh │ └── ruler.sh ├── extra_requirements.txt ├── flex_prefill ├── __init__.py ├── modules │ ├── __init__.py │ ├── glm │ │ ├── __init__.py │ │ ├── flash_attention.py │ │ ├── flex_prefill_attention.py │ │ ├── glm_mlp_forward.py │ │ ├── glm_self_attention_foward.py │ │ ├── minfer_attention.py │ │ ├── streaming_llm_attention.py │ │ └── vertical_slash_attention.py │ ├── llama │ │ ├── __init__.py │ │ ├── apply_rope.py │ │ ├── causal_model_forward.py │ │ ├── flash_attention.py │ │ ├── flex_prefill_attention.py │ │ ├── llama_mlp_forward.py │ │ ├── minfer_attention.py │ │ ├── streaming_llm_attention.py │ │ └── vertical_slash_attention.py │ ├── patch.py │ └── qwen2 │ │ ├── __init__.py │ │ ├── causal_model_forward.py │ │ ├── flash_attention.py │ │ ├── flex_prefill_attention.py │ │ ├── minfer_attention.py │ │ ├── qwen_mlp_forward.py │ │ ├── streaming_llm_attention.py │ │ └── vertical_slash_attention.py └── ops │ ├── __init__.py │ ├── flex_prefill_attention.py │ ├── minfer │ ├── __init__.py │ ├── config │ │ ├── GLM_4_9B_1M_instruct_kv_out_v32_fit_o_best_pattern.json │ │ ├── Llama_3.1_8B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json │ │ ├── Qwen2_7B_Instruct_128k_instruct_kv_out_v32_fit_o_best_pattern.json │ │ └── Yi_9B_200k_kv_out_v32_fit_o_best_pattern.json │ └── minfer_attention.py │ ├── streaming_llm_attention.py │ └── vertical_slash_attention.py ├── install.sh ├── setup.py └── tests ├── example_data.json ├── kernel_benchmark.py └── test_llm.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/README.md -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/code_debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/code_debug.yaml -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/data/InfiniteBench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/data/InfiniteBench.py -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/download_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/download_dataset.sh -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/kv_retrieval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/kv_retrieval.yaml -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/longbook_choice_eng.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/longbook_choice_eng.yaml -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/longbook_qa_chn.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/longbook_qa_chn.yaml -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/longbook_qa_eng.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/longbook_qa_eng.yaml -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/longbook_sum_eng.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/longbook_sum_eng.yaml -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/longdialogue_qa_eng.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/longdialogue_qa_eng.yaml -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/math_find.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/math_find.yaml -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/metrics.py -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/number_string.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/number_string.yaml -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/passkey.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/passkey.yaml -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/rouge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/rouge.py -------------------------------------------------------------------------------- /experiments/benchmark/infinitebench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/infinitebench/utils.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/prepare.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/synthetic/common_words_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/synthetic/common_words_extraction.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/synthetic/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/synthetic/constants.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/synthetic/freq_words_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/synthetic/freq_words_extraction.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/synthetic/json/PaulGrahamEssays_URLs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/synthetic/json/PaulGrahamEssays_URLs.txt -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/synthetic/json/download_paulgraham_essay.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/synthetic/json/download_paulgraham_essay.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/synthetic/json/download_qa_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/synthetic/json/download_qa_dataset.sh -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/synthetic/niah.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/synthetic/niah.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/synthetic/qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/synthetic/qa.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/synthetic/variable_tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/synthetic/variable_tracking.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/template.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/data/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/data/tokenizer.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/debug_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/debug_run.sh -------------------------------------------------------------------------------- /experiments/benchmark/ruler/download_dataset.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/download_dataset.sh -------------------------------------------------------------------------------- /experiments/benchmark/ruler/eval/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/eval/evaluate.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/eval/synthetic/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/eval/synthetic/constants.py -------------------------------------------------------------------------------- /experiments/benchmark/ruler/synthetic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/ruler/synthetic.yaml -------------------------------------------------------------------------------- /experiments/benchmark/run_infinitebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/run_infinitebench.py -------------------------------------------------------------------------------- /experiments/benchmark/run_ruler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/run_ruler.py -------------------------------------------------------------------------------- /experiments/benchmark/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/benchmark/utils.py -------------------------------------------------------------------------------- /experiments/download_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/download_data.sh -------------------------------------------------------------------------------- /experiments/download_model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/download_model.sh -------------------------------------------------------------------------------- /experiments/scripts/flex_prefill/infinitebench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/scripts/flex_prefill/infinitebench.sh -------------------------------------------------------------------------------- /experiments/scripts/flex_prefill/ruler.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/scripts/flex_prefill/ruler.sh -------------------------------------------------------------------------------- /experiments/scripts/full/infinitebench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/scripts/full/infinitebench.sh -------------------------------------------------------------------------------- /experiments/scripts/full/ruler.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/scripts/full/ruler.sh -------------------------------------------------------------------------------- /experiments/scripts/minfer/infinitebench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/scripts/minfer/infinitebench.sh -------------------------------------------------------------------------------- /experiments/scripts/minfer/ruler.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/scripts/minfer/ruler.sh -------------------------------------------------------------------------------- /experiments/scripts/streaming_llm/infinitebench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/scripts/streaming_llm/infinitebench.sh -------------------------------------------------------------------------------- /experiments/scripts/streaming_llm/ruler.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/experiments/scripts/streaming_llm/ruler.sh -------------------------------------------------------------------------------- /extra_requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/extra_requirements.txt -------------------------------------------------------------------------------- /flex_prefill/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/__init__.py -------------------------------------------------------------------------------- /flex_prefill/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_prefill/modules/glm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_prefill/modules/glm/flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/glm/flash_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/glm/flex_prefill_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/glm/flex_prefill_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/glm/glm_mlp_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/glm/glm_mlp_forward.py -------------------------------------------------------------------------------- /flex_prefill/modules/glm/glm_self_attention_foward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/glm/glm_self_attention_foward.py -------------------------------------------------------------------------------- /flex_prefill/modules/glm/minfer_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/glm/minfer_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/glm/streaming_llm_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/glm/streaming_llm_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/glm/vertical_slash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/glm/vertical_slash_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/llama/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_prefill/modules/llama/apply_rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/llama/apply_rope.py -------------------------------------------------------------------------------- /flex_prefill/modules/llama/causal_model_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/llama/causal_model_forward.py -------------------------------------------------------------------------------- /flex_prefill/modules/llama/flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/llama/flash_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/llama/flex_prefill_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/llama/flex_prefill_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/llama/llama_mlp_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/llama/llama_mlp_forward.py -------------------------------------------------------------------------------- /flex_prefill/modules/llama/minfer_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/llama/minfer_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/llama/streaming_llm_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/llama/streaming_llm_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/llama/vertical_slash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/llama/vertical_slash_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/patch.py -------------------------------------------------------------------------------- /flex_prefill/modules/qwen2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_prefill/modules/qwen2/causal_model_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/qwen2/causal_model_forward.py -------------------------------------------------------------------------------- /flex_prefill/modules/qwen2/flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/qwen2/flash_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/qwen2/flex_prefill_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/qwen2/flex_prefill_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/qwen2/minfer_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/qwen2/minfer_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/qwen2/qwen_mlp_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/qwen2/qwen_mlp_forward.py -------------------------------------------------------------------------------- /flex_prefill/modules/qwen2/streaming_llm_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/qwen2/streaming_llm_attention.py -------------------------------------------------------------------------------- /flex_prefill/modules/qwen2/vertical_slash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/modules/qwen2/vertical_slash_attention.py -------------------------------------------------------------------------------- /flex_prefill/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_prefill/ops/flex_prefill_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/ops/flex_prefill_attention.py -------------------------------------------------------------------------------- /flex_prefill/ops/minfer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /flex_prefill/ops/minfer/config/GLM_4_9B_1M_instruct_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/ops/minfer/config/GLM_4_9B_1M_instruct_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /flex_prefill/ops/minfer/config/Llama_3.1_8B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/ops/minfer/config/Llama_3.1_8B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /flex_prefill/ops/minfer/config/Qwen2_7B_Instruct_128k_instruct_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/ops/minfer/config/Qwen2_7B_Instruct_128k_instruct_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /flex_prefill/ops/minfer/config/Yi_9B_200k_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/ops/minfer/config/Yi_9B_200k_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /flex_prefill/ops/minfer/minfer_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/ops/minfer/minfer_attention.py -------------------------------------------------------------------------------- /flex_prefill/ops/streaming_llm_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/ops/streaming_llm_attention.py -------------------------------------------------------------------------------- /flex_prefill/ops/vertical_slash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/flex_prefill/ops/vertical_slash_attention.py -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/install.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/setup.py -------------------------------------------------------------------------------- /tests/example_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/tests/example_data.json -------------------------------------------------------------------------------- /tests/kernel_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/tests/kernel_benchmark.py -------------------------------------------------------------------------------- /tests/test_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ByteDance-Seed/FlexPrefill/HEAD/tests/test_llm.py --------------------------------------------------------------------------------