├── .clang-format ├── .gemini └── config.yaml ├── .github └── workflows │ └── pre-commit.yml ├── .gitignore ├── .license-header.txt ├── .pre-commit-config.yaml ├── LICENSE ├── MANIFEST.in ├── README.md ├── assets ├── ads.jpg ├── kvcached-example-thumbnail.png ├── kvcached-example.gif ├── logo-v1.svg ├── logo-v2-icon-only.svg ├── logo-v2-transparent.svg ├── logo-v2.svg ├── ttft_results │ ├── plot_ttft.py │ ├── ttft_mean.svg │ └── ttft_p99.svg ├── uc-colocate.svg ├── uc-compound.svg ├── uc-icons.pptx ├── uc-multillm.svg ├── uc-serverless.svg ├── vmm.svg └── vmm_v2.svg ├── benchmarks ├── bench_kvcached_overhead │ ├── README.md │ ├── start_client.sh │ └── start_server.sh ├── bench_latency_benefit │ ├── bench-config.yaml │ ├── bench_kvcached_vllm.py │ ├── run_benchmark.sh │ └── start_vllm_server.sh ├── bench_map_parallelism │ ├── README.md │ └── kvcached_map_parallel_benchmark.py ├── bench_tp_ipc │ ├── README.md │ ├── broadcast_map_impl │ │ ├── __init__.py │ │ ├── python_async_await.py │ │ ├── sequential_sync.py │ │ └── threadpool.py │ └── kvcached_tp_ipc_benchmark.py ├── bench_vmm │ ├── Makefile │ ├── README.md │ ├── bench_vmm.cpp │ └── cuda_utils.hpp ├── gsm8k │ ├── README.md │ ├── bench_sglang.py │ └── bench_vllm.py └── simple_bench │ ├── env_detect.sh │ ├── start_client.sh │ └── start_server.sh ├── controller ├── README.md ├── __init__.py ├── benchmark.py ├── example-config.yaml ├── frontend.py ├── launch.py ├── router.py ├── sleep_manager.py ├── traffic_monitor.py └── utils.py ├── csrc ├── allocator.cpp ├── ftensor.cpp ├── inc │ ├── allocator.hpp │ ├── constants.hpp │ ├── cuda_utils.hpp │ ├── ftensor.hpp │ ├── impl │ │ └── torch_utils.ipp │ ├── page.hpp │ └── torch_utils.hpp ├── page.cpp └── torch_bindings.cpp ├── docker ├── Dockerfile.dev ├── Dockerfile.sglang ├── Dockerfile.vllm └── README.md ├── engine_integration ├── patches │ ├── kvcached-sglang-v0.4.6.post2.patch │ ├── kvcached-sglang-v0.4.9.patch │ ├── kvcached-vllm-v0.8.4.patch │ └── kvcached-vllm-v0.9.2.patch └── scripts │ ├── check_import_path.py │ ├── setup.sh │ └── setup_b200.sh ├── examples ├── 01_simple_two_models │ ├── README.md │ ├── send_requests.sh │ └── start_two_models.sh ├── 02_memory_control │ └── README.md ├── 03_model_router_sleep │ └── README.md ├── 04_inference_and_finetune │ ├── .gitignore │ ├── README.md │ ├── data │ │ ├── README.md │ │ ├── alpaca_en_demo.json │ │ └── dataset_info.json │ ├── llama3_lora_sft.yaml │ ├── setup.sh │ ├── start_finetune.sh │ ├── start_inference_and_finetune.sh │ ├── start_llm_client.sh │ └── start_llm_server.sh ├── 05_multi_agents │ ├── README.md │ ├── multi_agent_example.py │ ├── setup_langchain.sh │ └── start_multi_agent_models.sh ├── 06_serverless_serving │ └── README.md ├── 07_inference_and_diffusion │ ├── .gitignore │ ├── README.md │ ├── datasets │ │ └── vidprom.txt │ ├── diffusion_serving.py │ ├── setup.sh │ ├── start_diffusion.sh │ ├── start_inference_and_diffusion.sh │ ├── start_llm_client.sh │ └── start_llm_server.sh └── 08_hybrid_attention_models │ ├── README.md │ ├── send_requests.sh │ └── start_two_models.sh ├── kvcached ├── __init__.py ├── autopatch.py ├── cli │ ├── __init__.py │ ├── kvctl.py │ ├── kvtop.py │ └── utils.py ├── integration │ ├── __init__.py │ ├── patch_base.py │ ├── sglang │ │ ├── __init__.py │ │ ├── autopatch.py │ │ ├── interfaces.py │ │ └── patches.py │ ├── version_utils.py │ └── vllm │ │ ├── __init__.py │ │ ├── autopatch.py │ │ ├── interfaces.py │ │ └── patches.py ├── kv_cache_manager.py ├── locks.py ├── mem_info_tracker.py ├── page_allocator.py ├── tp_ipc_util.py └── utils.py ├── kvcached_autopatch.pth ├── pyproject.toml ├── requirements.txt ├── setup.py ├── tests ├── test_kvcache_manager.py ├── test_offline_serving.py ├── test_shm_info_tracker.py ├── test_sleep_manager.py ├── test_traffic_monitor.py └── test_utils.py └── tools ├── addlicense.sh ├── dev_copy_pth.py ├── mypy-strict.sh └── mypy.sh /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM -------------------------------------------------------------------------------- /.gemini/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/.gemini/config.yaml -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/.gitignore -------------------------------------------------------------------------------- /.license-header.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/.license-header.txt -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/README.md -------------------------------------------------------------------------------- /assets/ads.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/ads.jpg -------------------------------------------------------------------------------- /assets/kvcached-example-thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/kvcached-example-thumbnail.png -------------------------------------------------------------------------------- /assets/kvcached-example.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/kvcached-example.gif -------------------------------------------------------------------------------- /assets/logo-v1.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/logo-v1.svg -------------------------------------------------------------------------------- /assets/logo-v2-icon-only.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/logo-v2-icon-only.svg -------------------------------------------------------------------------------- /assets/logo-v2-transparent.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/logo-v2-transparent.svg -------------------------------------------------------------------------------- /assets/logo-v2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/logo-v2.svg -------------------------------------------------------------------------------- /assets/ttft_results/plot_ttft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/ttft_results/plot_ttft.py -------------------------------------------------------------------------------- /assets/ttft_results/ttft_mean.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/ttft_results/ttft_mean.svg -------------------------------------------------------------------------------- /assets/ttft_results/ttft_p99.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/ttft_results/ttft_p99.svg -------------------------------------------------------------------------------- /assets/uc-colocate.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/uc-colocate.svg -------------------------------------------------------------------------------- /assets/uc-compound.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/uc-compound.svg -------------------------------------------------------------------------------- /assets/uc-icons.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/uc-icons.pptx -------------------------------------------------------------------------------- /assets/uc-multillm.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/uc-multillm.svg -------------------------------------------------------------------------------- /assets/uc-serverless.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/uc-serverless.svg -------------------------------------------------------------------------------- /assets/vmm.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/vmm.svg -------------------------------------------------------------------------------- /assets/vmm_v2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/assets/vmm_v2.svg -------------------------------------------------------------------------------- /benchmarks/bench_kvcached_overhead/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_kvcached_overhead/README.md -------------------------------------------------------------------------------- /benchmarks/bench_kvcached_overhead/start_client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_kvcached_overhead/start_client.sh -------------------------------------------------------------------------------- /benchmarks/bench_kvcached_overhead/start_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_kvcached_overhead/start_server.sh -------------------------------------------------------------------------------- /benchmarks/bench_latency_benefit/bench-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_latency_benefit/bench-config.yaml -------------------------------------------------------------------------------- /benchmarks/bench_latency_benefit/bench_kvcached_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_latency_benefit/bench_kvcached_vllm.py -------------------------------------------------------------------------------- /benchmarks/bench_latency_benefit/run_benchmark.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_latency_benefit/run_benchmark.sh -------------------------------------------------------------------------------- /benchmarks/bench_latency_benefit/start_vllm_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_latency_benefit/start_vllm_server.sh -------------------------------------------------------------------------------- /benchmarks/bench_map_parallelism/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_map_parallelism/README.md -------------------------------------------------------------------------------- /benchmarks/bench_map_parallelism/kvcached_map_parallel_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_map_parallelism/kvcached_map_parallel_benchmark.py -------------------------------------------------------------------------------- /benchmarks/bench_tp_ipc/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_tp_ipc/README.md -------------------------------------------------------------------------------- /benchmarks/bench_tp_ipc/broadcast_map_impl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_tp_ipc/broadcast_map_impl/__init__.py -------------------------------------------------------------------------------- /benchmarks/bench_tp_ipc/broadcast_map_impl/python_async_await.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_tp_ipc/broadcast_map_impl/python_async_await.py -------------------------------------------------------------------------------- /benchmarks/bench_tp_ipc/broadcast_map_impl/sequential_sync.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_tp_ipc/broadcast_map_impl/sequential_sync.py -------------------------------------------------------------------------------- /benchmarks/bench_tp_ipc/broadcast_map_impl/threadpool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_tp_ipc/broadcast_map_impl/threadpool.py -------------------------------------------------------------------------------- /benchmarks/bench_tp_ipc/kvcached_tp_ipc_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_tp_ipc/kvcached_tp_ipc_benchmark.py -------------------------------------------------------------------------------- /benchmarks/bench_vmm/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_vmm/Makefile -------------------------------------------------------------------------------- /benchmarks/bench_vmm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_vmm/README.md -------------------------------------------------------------------------------- /benchmarks/bench_vmm/bench_vmm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_vmm/bench_vmm.cpp -------------------------------------------------------------------------------- /benchmarks/bench_vmm/cuda_utils.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/bench_vmm/cuda_utils.hpp -------------------------------------------------------------------------------- /benchmarks/gsm8k/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/gsm8k/README.md -------------------------------------------------------------------------------- /benchmarks/gsm8k/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/gsm8k/bench_sglang.py -------------------------------------------------------------------------------- /benchmarks/gsm8k/bench_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/gsm8k/bench_vllm.py -------------------------------------------------------------------------------- /benchmarks/simple_bench/env_detect.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/simple_bench/env_detect.sh -------------------------------------------------------------------------------- /benchmarks/simple_bench/start_client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/simple_bench/start_client.sh -------------------------------------------------------------------------------- /benchmarks/simple_bench/start_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/benchmarks/simple_bench/start_server.sh -------------------------------------------------------------------------------- /controller/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/controller/README.md -------------------------------------------------------------------------------- /controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/controller/__init__.py -------------------------------------------------------------------------------- /controller/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/controller/benchmark.py -------------------------------------------------------------------------------- /controller/example-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/controller/example-config.yaml -------------------------------------------------------------------------------- /controller/frontend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/controller/frontend.py -------------------------------------------------------------------------------- /controller/launch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/controller/launch.py -------------------------------------------------------------------------------- /controller/router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/controller/router.py -------------------------------------------------------------------------------- /controller/sleep_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/controller/sleep_manager.py -------------------------------------------------------------------------------- /controller/traffic_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/controller/traffic_monitor.py -------------------------------------------------------------------------------- /controller/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/controller/utils.py -------------------------------------------------------------------------------- /csrc/allocator.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/allocator.cpp -------------------------------------------------------------------------------- /csrc/ftensor.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/ftensor.cpp -------------------------------------------------------------------------------- /csrc/inc/allocator.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/inc/allocator.hpp -------------------------------------------------------------------------------- /csrc/inc/constants.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/inc/constants.hpp -------------------------------------------------------------------------------- /csrc/inc/cuda_utils.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/inc/cuda_utils.hpp -------------------------------------------------------------------------------- /csrc/inc/ftensor.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/inc/ftensor.hpp -------------------------------------------------------------------------------- /csrc/inc/impl/torch_utils.ipp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/inc/impl/torch_utils.ipp -------------------------------------------------------------------------------- /csrc/inc/page.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/inc/page.hpp -------------------------------------------------------------------------------- /csrc/inc/torch_utils.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/inc/torch_utils.hpp -------------------------------------------------------------------------------- /csrc/page.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/page.cpp -------------------------------------------------------------------------------- /csrc/torch_bindings.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/csrc/torch_bindings.cpp -------------------------------------------------------------------------------- /docker/Dockerfile.dev: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/docker/Dockerfile.dev -------------------------------------------------------------------------------- /docker/Dockerfile.sglang: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/docker/Dockerfile.sglang -------------------------------------------------------------------------------- /docker/Dockerfile.vllm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/docker/Dockerfile.vllm -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/docker/README.md -------------------------------------------------------------------------------- /engine_integration/patches/kvcached-sglang-v0.4.6.post2.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/engine_integration/patches/kvcached-sglang-v0.4.6.post2.patch -------------------------------------------------------------------------------- /engine_integration/patches/kvcached-sglang-v0.4.9.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/engine_integration/patches/kvcached-sglang-v0.4.9.patch -------------------------------------------------------------------------------- /engine_integration/patches/kvcached-vllm-v0.8.4.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/engine_integration/patches/kvcached-vllm-v0.8.4.patch -------------------------------------------------------------------------------- /engine_integration/patches/kvcached-vllm-v0.9.2.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/engine_integration/patches/kvcached-vllm-v0.9.2.patch -------------------------------------------------------------------------------- /engine_integration/scripts/check_import_path.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/engine_integration/scripts/check_import_path.py -------------------------------------------------------------------------------- /engine_integration/scripts/setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/engine_integration/scripts/setup.sh -------------------------------------------------------------------------------- /engine_integration/scripts/setup_b200.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/engine_integration/scripts/setup_b200.sh -------------------------------------------------------------------------------- /examples/01_simple_two_models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/01_simple_two_models/README.md -------------------------------------------------------------------------------- /examples/01_simple_two_models/send_requests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/01_simple_two_models/send_requests.sh -------------------------------------------------------------------------------- /examples/01_simple_two_models/start_two_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/01_simple_two_models/start_two_models.sh -------------------------------------------------------------------------------- /examples/02_memory_control/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/02_memory_control/README.md -------------------------------------------------------------------------------- /examples/03_model_router_sleep/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/03_model_router_sleep/README.md -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/.gitignore: -------------------------------------------------------------------------------- 1 | ShareGPT_V3_unfiltered_cleaned_split.json 2 | -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/04_inference_and_finetune/README.md -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/04_inference_and_finetune/data/README.md -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/data/alpaca_en_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/04_inference_and_finetune/data/alpaca_en_demo.json -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/data/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/04_inference_and_finetune/data/dataset_info.json -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/llama3_lora_sft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/04_inference_and_finetune/llama3_lora_sft.yaml -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/04_inference_and_finetune/setup.sh -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/start_finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/04_inference_and_finetune/start_finetune.sh -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/start_inference_and_finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/04_inference_and_finetune/start_inference_and_finetune.sh -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/start_llm_client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/04_inference_and_finetune/start_llm_client.sh -------------------------------------------------------------------------------- /examples/04_inference_and_finetune/start_llm_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/04_inference_and_finetune/start_llm_server.sh -------------------------------------------------------------------------------- /examples/05_multi_agents/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/05_multi_agents/README.md -------------------------------------------------------------------------------- /examples/05_multi_agents/multi_agent_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/05_multi_agents/multi_agent_example.py -------------------------------------------------------------------------------- /examples/05_multi_agents/setup_langchain.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/05_multi_agents/setup_langchain.sh -------------------------------------------------------------------------------- /examples/05_multi_agents/start_multi_agent_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/05_multi_agents/start_multi_agent_models.sh -------------------------------------------------------------------------------- /examples/06_serverless_serving/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/06_serverless_serving/README.md -------------------------------------------------------------------------------- /examples/07_inference_and_diffusion/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/07_inference_and_diffusion/.gitignore -------------------------------------------------------------------------------- /examples/07_inference_and_diffusion/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/07_inference_and_diffusion/README.md -------------------------------------------------------------------------------- /examples/07_inference_and_diffusion/datasets/vidprom.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/07_inference_and_diffusion/datasets/vidprom.txt -------------------------------------------------------------------------------- /examples/07_inference_and_diffusion/diffusion_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/07_inference_and_diffusion/diffusion_serving.py -------------------------------------------------------------------------------- /examples/07_inference_and_diffusion/setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/07_inference_and_diffusion/setup.sh -------------------------------------------------------------------------------- /examples/07_inference_and_diffusion/start_diffusion.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/07_inference_and_diffusion/start_diffusion.sh -------------------------------------------------------------------------------- /examples/07_inference_and_diffusion/start_inference_and_diffusion.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/07_inference_and_diffusion/start_inference_and_diffusion.sh -------------------------------------------------------------------------------- /examples/07_inference_and_diffusion/start_llm_client.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/07_inference_and_diffusion/start_llm_client.sh -------------------------------------------------------------------------------- /examples/07_inference_and_diffusion/start_llm_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/07_inference_and_diffusion/start_llm_server.sh -------------------------------------------------------------------------------- /examples/08_hybrid_attention_models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/08_hybrid_attention_models/README.md -------------------------------------------------------------------------------- /examples/08_hybrid_attention_models/send_requests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/08_hybrid_attention_models/send_requests.sh -------------------------------------------------------------------------------- /examples/08_hybrid_attention_models/start_two_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/examples/08_hybrid_attention_models/start_two_models.sh -------------------------------------------------------------------------------- /kvcached/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/__init__.py -------------------------------------------------------------------------------- /kvcached/autopatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/autopatch.py -------------------------------------------------------------------------------- /kvcached/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/cli/__init__.py -------------------------------------------------------------------------------- /kvcached/cli/kvctl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/cli/kvctl.py -------------------------------------------------------------------------------- /kvcached/cli/kvtop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/cli/kvtop.py -------------------------------------------------------------------------------- /kvcached/cli/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/cli/utils.py -------------------------------------------------------------------------------- /kvcached/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/__init__.py -------------------------------------------------------------------------------- /kvcached/integration/patch_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/patch_base.py -------------------------------------------------------------------------------- /kvcached/integration/sglang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/sglang/__init__.py -------------------------------------------------------------------------------- /kvcached/integration/sglang/autopatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/sglang/autopatch.py -------------------------------------------------------------------------------- /kvcached/integration/sglang/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/sglang/interfaces.py -------------------------------------------------------------------------------- /kvcached/integration/sglang/patches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/sglang/patches.py -------------------------------------------------------------------------------- /kvcached/integration/version_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/version_utils.py -------------------------------------------------------------------------------- /kvcached/integration/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/vllm/__init__.py -------------------------------------------------------------------------------- /kvcached/integration/vllm/autopatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/vllm/autopatch.py -------------------------------------------------------------------------------- /kvcached/integration/vllm/interfaces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/vllm/interfaces.py -------------------------------------------------------------------------------- /kvcached/integration/vllm/patches.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/integration/vllm/patches.py -------------------------------------------------------------------------------- /kvcached/kv_cache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/kv_cache_manager.py -------------------------------------------------------------------------------- /kvcached/locks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/locks.py -------------------------------------------------------------------------------- /kvcached/mem_info_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/mem_info_tracker.py -------------------------------------------------------------------------------- /kvcached/page_allocator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/page_allocator.py -------------------------------------------------------------------------------- /kvcached/tp_ipc_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/tp_ipc_util.py -------------------------------------------------------------------------------- /kvcached/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached/utils.py -------------------------------------------------------------------------------- /kvcached_autopatch.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/kvcached_autopatch.pth -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/setup.py -------------------------------------------------------------------------------- /tests/test_kvcache_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/tests/test_kvcache_manager.py -------------------------------------------------------------------------------- /tests/test_offline_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/tests/test_offline_serving.py -------------------------------------------------------------------------------- /tests/test_shm_info_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/tests/test_shm_info_tracker.py -------------------------------------------------------------------------------- /tests/test_sleep_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/tests/test_sleep_manager.py -------------------------------------------------------------------------------- /tests/test_traffic_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/tests/test_traffic_monitor.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/tests/test_utils.py -------------------------------------------------------------------------------- /tools/addlicense.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/tools/addlicense.sh -------------------------------------------------------------------------------- /tools/dev_copy_pth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/tools/dev_copy_pth.py -------------------------------------------------------------------------------- /tools/mypy-strict.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/tools/mypy-strict.sh -------------------------------------------------------------------------------- /tools/mypy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ovg-project/kvcached/HEAD/tools/mypy.sh --------------------------------------------------------------------------------