├── LICENSE ├── MInference ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── Transparency_FAQ.md ├── assets │ └── license_header.txt ├── csrc │ ├── kernels.cpp │ └── vertical_slash_index.cu ├── examples │ ├── run_hf.py │ ├── run_hf_streaming.py │ ├── run_hf_streaming.sh │ └── run_vllm.py ├── images │ ├── MInference1_onepage.png │ ├── MInference_logo.png │ ├── SCBench_onepage.png │ ├── benchmarks │ │ ├── needle_viz_LLaMA-3-8B-1M_ours_1K_1000K.png │ │ └── ppl-LLaMA-3-262k.png │ ├── scbench │ │ ├── comparison.png │ │ └── overview.png │ └── t5_sparse_pattern.png ├── minference │ ├── __init__.py │ ├── configs │ │ ├── GLM_4_9B_1M_instruct_kv_out_v32_fit_o_best_pattern.json │ │ ├── Llama_3.1_70B_Instruct_128k_kv_out_v32_fit_o_best_pattern_v2.json │ │ ├── Llama_3.1_8B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json │ │ ├── Llama_3.1_8B_Instruct_128k_kv_out_v32_fit_o_best_pattern_v2.json │ │ ├── Llama_3_70B_Instruct_262k_kv_out_v32_fit_o_best_pattern.json │ │ ├── Llama_3_8B_Instruct_262k_kv_out_v32_fit_o_best_pattern.json │ │ ├── Phi_3_mini_128k_instruct_kv_out_v32_fit_o_best_pattern.json │ │ ├── Qwen2.5_32B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json │ │ ├── Qwen2.5_72B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json │ │ ├── Qwen2_7B_Instruct_128k_instruct_kv_out_v32_fit_o_best_pattern.json │ │ ├── Yi_9B_200k_kv_out_v32_fit_o_best_pattern.json │ │ ├── __init__.py │ │ └── model2path.py │ ├── minference_configuration.py │ ├── models_patch.py │ ├── modules │ │ ├── __init__.py │ │ ├── flexprefill.py │ │ ├── forward.py │ │ ├── inf_llm.py │ │ ├── kivi.py │ │ ├── kvcompression.py │ │ ├── minference_forward.py │ │ ├── pyramidkv.py │ │ ├── quest.py │ │ ├── retr_attn.py │ │ └── snapkv.py │ ├── ops │ │ ├── __init__.py │ │ ├── block_sparse_flash_attention.py │ │ ├── flash_attn_triton.py │ │ ├── pit_sparse_flash_attention.py │ │ ├── pit_sparse_flash_attention_v2.py │ │ └── streaming_kernel.py │ ├── patch.py │ ├── utils.py │ └── version.py ├── papers │ └── MInference1_Arxiv.pdf ├── scbench │ ├── args.py │ ├── cache_blend.yaml │ ├── compute_scores.py │ ├── eval_utils.py │ ├── readme.md │ ├── repo_qa_utils.py │ ├── requirements.txt │ ├── run_scbench.py │ ├── scripts │ │ ├── run_all_tasks.sh │ │ ├── run_single_method.sh │ │ ├── test_llama.sh │ │ └── test_minference_with_snapkv.sh │ └── setup │ │ └── setup_kivi.sh ├── setup.cfg ├── setup.py └── tests │ └── test_e2e.py ├── README.md ├── assets └── teaser.png ├── eval ├── README.md ├── arguments.py ├── configs │ ├── icl.yaml │ ├── longqa.yaml │ ├── rag.yaml │ ├── recall.yaml │ ├── rerank.yaml │ └── summ.yaml ├── data.py ├── eval.py ├── eval_alce.py ├── locret_cache.py ├── longproc_addon │ ├── README.md │ ├── configs │ │ ├── html_to_tsv.yaml │ │ └── travel_planning.yaml │ └── longproc_helmet_loader.py ├── model_utils.py ├── prompts │ ├── asqa_nocite.json │ ├── asqa_revised.json │ ├── qampari_nocite.json │ └── qampari_revised.json ├── requirements.txt ├── run_scripts │ ├── run_duo_32k.sh │ ├── run_fp_pruduo.sh │ ├── run_fp_pyramidkv_sweep.sh │ ├── run_l2_sweep.sh │ ├── run_localsize_sweep_pruduo.sh │ ├── run_locret_sweep.sh │ ├── run_prulong_32k.sh │ ├── run_pyramidkv_snapkv_128k_local64_sweep.sh │ ├── run_pyramidkv_snapkv_128k_nopatch_pool_8k_sweep.sh │ ├── run_pyramidkv_snapkv_128k_nopatch_pool_sweep.sh │ ├── run_pyramidkv_snapkv_128k_patch_pool_8k_sweep.sh │ ├── run_pyramidkv_snapkv_128k_patch_pool_sweep.sh │ ├── run_realmetrics_orig.sh │ ├── run_realmetrics_pruduo.sh │ └── run_realmetrics_pyramidkv_snapkv_128k.sh ├── scripts │ ├── eval_gpt4_longqa.py │ ├── eval_gpt4_summ.py │ └── gpt4_eval_utils.py ├── utils.py └── viz │ ├── gather_results.py │ ├── kv_footprint.py │ └── plot_kv_footprint.py └── prulong ├── README.md ├── concat_packed_datasets.py ├── requirements.txt ├── run_scripts ├── prulong_masksandweights.sh ├── prulong_masksonly.sh └── sft.sh ├── save_prulong_masks.py ├── scripts └── unpack_dataset.py └── training ├── dataset.py ├── distributed_attention.py ├── l0.py ├── lh_train_language_model.py ├── lh_trainer.py └── modeling_flash_llama.py /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/LICENSE -------------------------------------------------------------------------------- /MInference/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/.pre-commit-config.yaml -------------------------------------------------------------------------------- /MInference/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /MInference/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/LICENSE -------------------------------------------------------------------------------- /MInference/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/MANIFEST.in -------------------------------------------------------------------------------- /MInference/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/Makefile -------------------------------------------------------------------------------- /MInference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/README.md -------------------------------------------------------------------------------- /MInference/SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/SECURITY.md -------------------------------------------------------------------------------- /MInference/SUPPORT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/SUPPORT.md -------------------------------------------------------------------------------- /MInference/Transparency_FAQ.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/Transparency_FAQ.md -------------------------------------------------------------------------------- /MInference/assets/license_header.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/assets/license_header.txt -------------------------------------------------------------------------------- /MInference/csrc/kernels.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/csrc/kernels.cpp -------------------------------------------------------------------------------- /MInference/csrc/vertical_slash_index.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/csrc/vertical_slash_index.cu -------------------------------------------------------------------------------- /MInference/examples/run_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/examples/run_hf.py -------------------------------------------------------------------------------- /MInference/examples/run_hf_streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/examples/run_hf_streaming.py -------------------------------------------------------------------------------- /MInference/examples/run_hf_streaming.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/examples/run_hf_streaming.sh -------------------------------------------------------------------------------- /MInference/examples/run_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/examples/run_vllm.py -------------------------------------------------------------------------------- /MInference/images/MInference1_onepage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/images/MInference1_onepage.png -------------------------------------------------------------------------------- /MInference/images/MInference_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/images/MInference_logo.png -------------------------------------------------------------------------------- /MInference/images/SCBench_onepage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/images/SCBench_onepage.png -------------------------------------------------------------------------------- /MInference/images/benchmarks/needle_viz_LLaMA-3-8B-1M_ours_1K_1000K.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/images/benchmarks/needle_viz_LLaMA-3-8B-1M_ours_1K_1000K.png -------------------------------------------------------------------------------- /MInference/images/benchmarks/ppl-LLaMA-3-262k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/images/benchmarks/ppl-LLaMA-3-262k.png -------------------------------------------------------------------------------- /MInference/images/scbench/comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/images/scbench/comparison.png -------------------------------------------------------------------------------- /MInference/images/scbench/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/images/scbench/overview.png -------------------------------------------------------------------------------- /MInference/images/t5_sparse_pattern.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/images/t5_sparse_pattern.png -------------------------------------------------------------------------------- /MInference/minference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/__init__.py -------------------------------------------------------------------------------- /MInference/minference/configs/GLM_4_9B_1M_instruct_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/GLM_4_9B_1M_instruct_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /MInference/minference/configs/Llama_3.1_70B_Instruct_128k_kv_out_v32_fit_o_best_pattern_v2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/Llama_3.1_70B_Instruct_128k_kv_out_v32_fit_o_best_pattern_v2.json -------------------------------------------------------------------------------- /MInference/minference/configs/Llama_3.1_8B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/Llama_3.1_8B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /MInference/minference/configs/Llama_3.1_8B_Instruct_128k_kv_out_v32_fit_o_best_pattern_v2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/Llama_3.1_8B_Instruct_128k_kv_out_v32_fit_o_best_pattern_v2.json -------------------------------------------------------------------------------- /MInference/minference/configs/Llama_3_70B_Instruct_262k_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/Llama_3_70B_Instruct_262k_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /MInference/minference/configs/Llama_3_8B_Instruct_262k_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/Llama_3_8B_Instruct_262k_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /MInference/minference/configs/Phi_3_mini_128k_instruct_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/Phi_3_mini_128k_instruct_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /MInference/minference/configs/Qwen2.5_32B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/Qwen2.5_32B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /MInference/minference/configs/Qwen2.5_72B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/Qwen2.5_72B_Instruct_128k_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /MInference/minference/configs/Qwen2_7B_Instruct_128k_instruct_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/Qwen2_7B_Instruct_128k_instruct_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /MInference/minference/configs/Yi_9B_200k_kv_out_v32_fit_o_best_pattern.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/Yi_9B_200k_kv_out_v32_fit_o_best_pattern.json -------------------------------------------------------------------------------- /MInference/minference/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MInference/minference/configs/model2path.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/configs/model2path.py -------------------------------------------------------------------------------- /MInference/minference/minference_configuration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/minference_configuration.py -------------------------------------------------------------------------------- /MInference/minference/models_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/models_patch.py -------------------------------------------------------------------------------- /MInference/minference/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MInference/minference/modules/flexprefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/modules/flexprefill.py -------------------------------------------------------------------------------- /MInference/minference/modules/forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/modules/forward.py -------------------------------------------------------------------------------- /MInference/minference/modules/inf_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/modules/inf_llm.py -------------------------------------------------------------------------------- /MInference/minference/modules/kivi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/modules/kivi.py -------------------------------------------------------------------------------- /MInference/minference/modules/kvcompression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/modules/kvcompression.py -------------------------------------------------------------------------------- /MInference/minference/modules/minference_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/modules/minference_forward.py -------------------------------------------------------------------------------- /MInference/minference/modules/pyramidkv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/modules/pyramidkv.py -------------------------------------------------------------------------------- /MInference/minference/modules/quest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/modules/quest.py -------------------------------------------------------------------------------- /MInference/minference/modules/retr_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/modules/retr_attn.py -------------------------------------------------------------------------------- /MInference/minference/modules/snapkv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/modules/snapkv.py -------------------------------------------------------------------------------- /MInference/minference/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MInference/minference/ops/block_sparse_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/ops/block_sparse_flash_attention.py -------------------------------------------------------------------------------- /MInference/minference/ops/flash_attn_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/ops/flash_attn_triton.py -------------------------------------------------------------------------------- /MInference/minference/ops/pit_sparse_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/ops/pit_sparse_flash_attention.py -------------------------------------------------------------------------------- /MInference/minference/ops/pit_sparse_flash_attention_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/ops/pit_sparse_flash_attention_v2.py -------------------------------------------------------------------------------- /MInference/minference/ops/streaming_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/ops/streaming_kernel.py -------------------------------------------------------------------------------- /MInference/minference/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/patch.py -------------------------------------------------------------------------------- /MInference/minference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/utils.py -------------------------------------------------------------------------------- /MInference/minference/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/minference/version.py -------------------------------------------------------------------------------- /MInference/papers/MInference1_Arxiv.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/papers/MInference1_Arxiv.pdf -------------------------------------------------------------------------------- /MInference/scbench/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/args.py -------------------------------------------------------------------------------- /MInference/scbench/cache_blend.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/cache_blend.yaml -------------------------------------------------------------------------------- /MInference/scbench/compute_scores.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/compute_scores.py -------------------------------------------------------------------------------- /MInference/scbench/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/eval_utils.py -------------------------------------------------------------------------------- /MInference/scbench/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/readme.md -------------------------------------------------------------------------------- /MInference/scbench/repo_qa_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/repo_qa_utils.py -------------------------------------------------------------------------------- /MInference/scbench/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/requirements.txt -------------------------------------------------------------------------------- /MInference/scbench/run_scbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/run_scbench.py -------------------------------------------------------------------------------- /MInference/scbench/scripts/run_all_tasks.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/scripts/run_all_tasks.sh -------------------------------------------------------------------------------- /MInference/scbench/scripts/run_single_method.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/scripts/run_single_method.sh -------------------------------------------------------------------------------- /MInference/scbench/scripts/test_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/scripts/test_llama.sh -------------------------------------------------------------------------------- /MInference/scbench/scripts/test_minference_with_snapkv.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/scripts/test_minference_with_snapkv.sh -------------------------------------------------------------------------------- /MInference/scbench/setup/setup_kivi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/scbench/setup/setup_kivi.sh -------------------------------------------------------------------------------- /MInference/setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/setup.cfg -------------------------------------------------------------------------------- /MInference/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/setup.py -------------------------------------------------------------------------------- /MInference/tests/test_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/MInference/tests/test_e2e.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/README.md -------------------------------------------------------------------------------- /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/assets/teaser.png -------------------------------------------------------------------------------- /eval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/README.md -------------------------------------------------------------------------------- /eval/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/arguments.py -------------------------------------------------------------------------------- /eval/configs/icl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/configs/icl.yaml -------------------------------------------------------------------------------- /eval/configs/longqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/configs/longqa.yaml -------------------------------------------------------------------------------- /eval/configs/rag.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/configs/rag.yaml -------------------------------------------------------------------------------- /eval/configs/recall.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/configs/recall.yaml -------------------------------------------------------------------------------- /eval/configs/rerank.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/configs/rerank.yaml -------------------------------------------------------------------------------- /eval/configs/summ.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/configs/summ.yaml -------------------------------------------------------------------------------- /eval/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/data.py -------------------------------------------------------------------------------- /eval/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/eval.py -------------------------------------------------------------------------------- /eval/eval_alce.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/eval_alce.py -------------------------------------------------------------------------------- /eval/locret_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/locret_cache.py -------------------------------------------------------------------------------- /eval/longproc_addon/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/longproc_addon/README.md -------------------------------------------------------------------------------- /eval/longproc_addon/configs/html_to_tsv.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/longproc_addon/configs/html_to_tsv.yaml -------------------------------------------------------------------------------- /eval/longproc_addon/configs/travel_planning.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/longproc_addon/configs/travel_planning.yaml -------------------------------------------------------------------------------- /eval/longproc_addon/longproc_helmet_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/longproc_addon/longproc_helmet_loader.py -------------------------------------------------------------------------------- /eval/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/model_utils.py -------------------------------------------------------------------------------- /eval/prompts/asqa_nocite.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/prompts/asqa_nocite.json -------------------------------------------------------------------------------- /eval/prompts/asqa_revised.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/prompts/asqa_revised.json -------------------------------------------------------------------------------- /eval/prompts/qampari_nocite.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/prompts/qampari_nocite.json -------------------------------------------------------------------------------- /eval/prompts/qampari_revised.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/prompts/qampari_revised.json -------------------------------------------------------------------------------- /eval/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/requirements.txt -------------------------------------------------------------------------------- /eval/run_scripts/run_duo_32k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_duo_32k.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_fp_pruduo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_fp_pruduo.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_fp_pyramidkv_sweep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_fp_pyramidkv_sweep.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_l2_sweep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_l2_sweep.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_localsize_sweep_pruduo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_localsize_sweep_pruduo.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_locret_sweep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_locret_sweep.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_prulong_32k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_prulong_32k.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_pyramidkv_snapkv_128k_local64_sweep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_pyramidkv_snapkv_128k_local64_sweep.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_pyramidkv_snapkv_128k_nopatch_pool_8k_sweep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_pyramidkv_snapkv_128k_nopatch_pool_8k_sweep.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_pyramidkv_snapkv_128k_nopatch_pool_sweep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_pyramidkv_snapkv_128k_nopatch_pool_sweep.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_pyramidkv_snapkv_128k_patch_pool_8k_sweep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_pyramidkv_snapkv_128k_patch_pool_8k_sweep.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_pyramidkv_snapkv_128k_patch_pool_sweep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_pyramidkv_snapkv_128k_patch_pool_sweep.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_realmetrics_orig.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_realmetrics_orig.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_realmetrics_pruduo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_realmetrics_pruduo.sh -------------------------------------------------------------------------------- /eval/run_scripts/run_realmetrics_pyramidkv_snapkv_128k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/run_scripts/run_realmetrics_pyramidkv_snapkv_128k.sh -------------------------------------------------------------------------------- /eval/scripts/eval_gpt4_longqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/scripts/eval_gpt4_longqa.py -------------------------------------------------------------------------------- /eval/scripts/eval_gpt4_summ.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/scripts/eval_gpt4_summ.py -------------------------------------------------------------------------------- /eval/scripts/gpt4_eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/scripts/gpt4_eval_utils.py -------------------------------------------------------------------------------- /eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/utils.py -------------------------------------------------------------------------------- /eval/viz/gather_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/viz/gather_results.py -------------------------------------------------------------------------------- /eval/viz/kv_footprint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/viz/kv_footprint.py -------------------------------------------------------------------------------- /eval/viz/plot_kv_footprint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/eval/viz/plot_kv_footprint.py -------------------------------------------------------------------------------- /prulong/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/README.md -------------------------------------------------------------------------------- /prulong/concat_packed_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/concat_packed_datasets.py -------------------------------------------------------------------------------- /prulong/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/requirements.txt -------------------------------------------------------------------------------- /prulong/run_scripts/prulong_masksandweights.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/run_scripts/prulong_masksandweights.sh -------------------------------------------------------------------------------- /prulong/run_scripts/prulong_masksonly.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/run_scripts/prulong_masksonly.sh -------------------------------------------------------------------------------- /prulong/run_scripts/sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/run_scripts/sft.sh -------------------------------------------------------------------------------- /prulong/save_prulong_masks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/save_prulong_masks.py -------------------------------------------------------------------------------- /prulong/scripts/unpack_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/scripts/unpack_dataset.py -------------------------------------------------------------------------------- /prulong/training/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/training/dataset.py -------------------------------------------------------------------------------- /prulong/training/distributed_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/training/distributed_attention.py -------------------------------------------------------------------------------- /prulong/training/l0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/training/l0.py -------------------------------------------------------------------------------- /prulong/training/lh_train_language_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/training/lh_train_language_model.py -------------------------------------------------------------------------------- /prulong/training/lh_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/training/lh_trainer.py -------------------------------------------------------------------------------- /prulong/training/modeling_flash_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-pli/PruLong/HEAD/prulong/training/modeling_flash_llama.py --------------------------------------------------------------------------------