├── requirements.txt ├── ucm ├── pd │ └── __init__.py ├── shared │ ├── __init__.py │ ├── trans │ │ ├── __init__.py │ │ ├── simu │ │ │ ├── CMakeLists.txt │ │ │ ├── simu_buffer.h │ │ │ └── simu_device.cc │ │ ├── ascend │ │ │ ├── CMakeLists.txt │ │ │ ├── ascend_buffer.h │ │ │ └── ascend_device.cc │ │ ├── cuda │ │ │ ├── CMakeLists.txt │ │ │ ├── cuda_buffer.h │ │ │ ├── cuda_sm_kernel.h │ │ │ └── cuda_sm_stream.h │ │ ├── CMakeLists.txt │ │ ├── maca │ │ │ └── CMakeLists.txt │ │ ├── device.h │ │ └── buffer.h │ ├── metrics │ │ ├── __init__.py │ │ ├── CMakeLists.txt │ │ ├── cc │ │ │ ├── stats │ │ │ │ └── istats.h │ │ │ ├── stats_registry.h │ │ │ └── stats_registry.cc │ │ ├── test │ │ │ └── test.py │ │ └── cpy │ │ │ └── metrics.py.cc │ ├── CMakeLists.txt │ ├── test │ │ └── CMakeLists.txt │ ├── infra │ │ ├── CMakeLists.txt │ │ ├── time │ │ │ ├── now_time.h │ │ │ └── stopwatch.h │ │ └── template │ │ │ └── singleton.h │ └── vendor │ │ └── CMakeLists.txt ├── sparse │ ├── __init__.py │ ├── blend │ │ └── __init__.py │ ├── esa │ │ ├── __init__.py │ │ ├── retrieval │ │ │ ├── __init__.py │ │ │ └── CMakeLists.txt │ │ └── CMakeLists.txt │ ├── gsa │ │ ├── __init__.py │ │ ├── offload_ops │ │ │ ├── __init__.py │ │ │ ├── include │ │ │ │ ├── thread_safe_queue.h │ │ │ │ └── k_repre.h │ │ │ └── src │ │ │ │ ├── thread_safe_queue.cpp │ │ │ │ ├── pybinds.cpp │ │ │ │ └── k_repre.cpp │ │ ├── prefetch │ │ │ ├── __init__.py │ │ │ └── src │ │ │ │ └── pybinds.cpp │ │ └── CMakeLists.txt │ ├── kvcomp │ │ ├── .gitkeep │ │ ├── __init__.py │ │ ├── hash_retrieval │ │ │ ├── __init__.py │ │ │ └── CMakeLists.txt │ │ ├── figs │ │ │ ├── kvcomp_scheme.jpg │ │ │ ├── kvcomp_longbench.jpg │ │ │ ├── kvcomp_end_to_end_performance.jpg │ │ │ └── kvcomp_single_layer_performance.jpg │ │ ├── paper │ │ │ └── kvcomp-ACL-2025-paper.pdf │ │ ├── configs │ │ │ ├── kvcomp_deepseek_v2_lite_config.json │ │ │ └── kvcomp_qwen3_4B_config.json │ │ └── CMakeLists.txt │ ├── kvstar │ │ ├── .gitkeep │ │ ├── __init__.py │ │ ├── retrieve │ │ │ ├── __init__.py │ │ │ ├── core │ │ │ │ ├── domain │ │ │ │ │ └── retrieve_task │ │ │ │ │ │ ├── task_status.h │ │ │ │ │ │ ├── simd_compute_kernel.h │ │ │ │ │ │ ├── computation_task.h │ │ │ │ │ │ ├── retrieve_task_runner.h │ │ │ │ │ │ ├── task_result.h │ │ │ │ │ │ ├── retrieve_task_waiter.h │ │ │ │ │ │ ├── retrieve_task_runner.cpp │ │ │ │ │ │ ├── retrieve_task_manager.h │ │ │ │ │ │ ├── retrieve_task_queue.h │ │ │ │ │ │ ├── retrieve_task_set.h │ │ │ │ │ │ └── retrieve_task.h │ │ │ │ ├── infra │ │ │ │ │ ├── template │ │ │ │ │ │ └── singleton.h │ │ │ │ │ ├── memory │ │ │ │ │ │ ├── memory.cpp │ │ │ │ │ │ └── memory.h │ │ │ │ │ ├── logger │ │ │ │ │ │ ├── logger.h │ │ │ │ │ │ └── logger.cpp │ │ │ │ │ └── thread │ │ │ │ │ │ └── latch.h │ │ │ │ ├── CMakeLists.txt │ │ │ │ └── api │ │ │ │ │ └── kvstar_retrieve │ │ │ │ │ ├── kvstar_retrieve.h │ │ │ │ │ └── kvstar_retrieve.cpp │ │ │ └── py_intf │ │ │ │ └── CMakeLists.txt │ │ └── CMakeLists.txt │ ├── CMakeLists.txt │ ├── utils.py │ └── factory.py ├── store │ ├── __init__.py │ ├── nfsstore │ │ ├── __init__.py │ │ ├── device │ │ │ ├── simu │ │ │ │ └── CMakeLists.txt │ │ │ ├── musa │ │ │ │ └── CMakeLists.txt │ │ │ ├── cuda │ │ │ │ └── CMakeLists.txt │ │ │ ├── ascend │ │ │ │ └── CMakeLists.txt │ │ │ ├── CMakeLists.txt │ │ │ └── maca │ │ │ │ └── CMakeLists.txt │ │ ├── CMakeLists.txt │ │ └── cc │ │ │ └── domain │ │ │ ├── hotness │ │ │ ├── hotness_set.h │ │ │ └── hotness_timer.h │ │ │ ├── space │ │ │ ├── space_shard_temp_layout.h │ │ │ ├── space_property.h │ │ │ ├── space_layout.h │ │ │ └── space_recycle.h │ │ │ └── trans │ │ │ └── trans_manager.h │ ├── pcstore │ │ ├── __init__.py │ │ ├── CMakeLists.txt │ │ └── cc │ │ │ └── domain │ │ │ └── space │ │ │ ├── space_manager.h │ │ │ └── space_layout.h │ ├── mooncakestore │ │ ├── CMakeLists.txt │ │ └── __init__.py │ ├── test │ │ └── CMakeLists.txt │ ├── detail │ │ ├── CMakeLists.txt │ │ └── task │ │ │ ├── task_set.h │ │ │ ├── task_queue.h │ │ │ └── task_waiter.h │ ├── CMakeLists.txt │ └── ucmstore.h ├── integration │ ├── __init__.py │ └── vllm │ │ ├── __init__.py │ │ └── patch │ │ ├── __init__.py │ │ └── patch_funcs │ │ ├── __init__.py │ │ └── v092 │ │ └── __init__.py ├── sandbox │ ├── agentic_ai │ │ └── README.md │ └── sparse │ │ └── retake │ │ ├── .gitkeep │ │ ├── .gitignore │ │ ├── misc │ │ └── flexreduc_pipeline.png │ │ ├── requirements.txt │ │ ├── scripts │ │ ├── infer_eval.sh │ │ └── submission │ │ │ ├── prepare_lvbench_submission.py │ │ │ └── prepare_videomme_submission.py │ │ ├── configs │ │ ├── qwen2_vl │ │ │ ├── qwen2-vl_mlvu.yaml │ │ │ ├── qwen2-vl_lvbench.yaml │ │ │ ├── qwen2-vl_videomme.yaml │ │ │ ├── retake_qwen2-vl_videomme.yaml │ │ │ ├── retake_qwen2-vl_mlvu.yaml │ │ │ └── retake_qwen2-vl_lvbench.yaml │ │ ├── llava_video │ │ │ ├── llava-video_mlvu.yaml │ │ │ ├── llava-video_lvbench.yaml │ │ │ ├── llava-video_videomme.yaml │ │ │ ├── retake_llava-video_lvbench.yaml │ │ │ ├── retake_llava-video_mlvu.yaml │ │ │ └── retake_llava-video_videomme.yaml │ │ ├── qwen2_5_vl │ │ │ ├── qwen2-5-vl_videomme_f256.yaml │ │ │ ├── flexreduc_qwen2-5-vl_mlvu.yaml │ │ │ ├── flexreduc_qwen2-5-vl_lvbench.yaml │ │ │ ├── flexreduc_qwen2-5-vl_videomme.yaml │ │ │ └── flexreduc_qwen2-5-vl_longvideobench.yaml │ │ ├── demo.yaml │ │ └── demo_npu.yaml │ │ ├── environment_npu.yaml │ │ └── docs │ │ ├── prepare_videomme.md │ │ ├── prepare_lvbench.md │ │ ├── prepare_longvideobench.md │ │ └── prepare_mlvu.md ├── CMakeLists.txt ├── __init__.py └── logger.py ├── benchmarks └── .gitkeep ├── test ├── CMakeLists.txt ├── common │ ├── __init__.py │ ├── llmperf │ │ ├── __init__.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── common_metrics.py │ │ │ └── models.py │ ├── envPreCheck │ │ └── __init__.py │ └── doc │ │ └── LLMPerf.md ├── .gitignore ├── requirements.txt ├── pytest.ini ├── config.yaml └── suites │ └── E2E │ └── test_evaluator.py ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── 100-documentation.yml │ ├── 800-others.yml │ ├── 110-user-story.yml │ ├── 500-feature-request.yml │ ├── 600-new-model.yml │ ├── 300-usage.yml │ └── 200-installation.yml ├── actionlint.yaml ├── workflows │ ├── matchers │ │ ├── mypy.json │ │ ├── ruff.json │ │ └── actionlint.json │ ├── unifiedcache_test.yml │ ├── pre-commit.yml │ ├── cpp-linter.yml │ ├── e2e_test.yml │ └── ucmstore.yml ├── PULL_REQUEST_TEMPLATE.md └── CODEOWNERS ├── MANIFEST.in ├── docs ├── source │ ├── _static │ │ ├── css │ │ │ └── logo.css │ │ ├── images │ │ │ ├── idea.png │ │ │ ├── GSA_overview.png │ │ │ ├── architecture.png │ │ │ ├── blend_scheme.jpg │ │ │ ├── kvcomp_scheme.jpg │ │ │ ├── kvstar_diagram.png │ │ │ ├── prefix_cache.jpg │ │ │ ├── ucconn_ucmconn.png │ │ │ ├── kvcomp_longbench.jpg │ │ │ ├── kvstar_retrieve.png │ │ │ ├── nfs_performance.png │ │ │ ├── sparse_attn_arch.png │ │ │ ├── attention_overhead.png │ │ │ ├── attention_sparsity.png │ │ │ ├── pd_disaggregation.jpg │ │ │ ├── qrcode_for_wechat.png │ │ │ ├── mooncake_performance.png │ │ │ ├── GSA-E2E-offload-throughput.png │ │ │ ├── esa_async_retrieval_and_load.png │ │ │ ├── kvcomp_end_to_end_performance.jpg │ │ │ ├── GSA-E2E-non-offload-throughput.png │ │ │ └── kvcomp_single_layer_performance.jpg │ │ └── paper │ │ │ └── kvcomp-ACL-2025-paper.pdf │ ├── logos │ │ ├── UCM-dark.png │ │ └── UCM-light.png │ ├── about.md │ ├── developer-guide │ │ └── contribute.md │ └── conf.py ├── start.cmd ├── requirements-docs.txt ├── README.md ├── Makefile └── make.bat ├── requirements-lint.txt ├── docker ├── Dockerfile └── Dockerfile-NPU ├── .readthedocs.yaml ├── .pre-commit-config.yaml ├── pyproject.toml ├── format.sh ├── .gitignore ├── .clang-format ├── LICENSE ├── examples └── ucm_config_example.yaml └── CMakeLists.txt /requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/pd/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/shared/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/store/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/shared/trans/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/blend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/esa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/gsa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/common/llmperf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/integration/vllm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/shared/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/store/nfsstore/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/store/pcstore/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/common/envPreCheck/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/common/llmperf/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/integration/vllm/patch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sandbox/agentic_ai/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/esa/retrieval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/gsa/offload_ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/gsa/prefetch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/store/mooncakestore/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/store/mooncakestore/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/hash_retrieval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/integration/vllm/patch/patch_funcs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/integration/vllm/patch/patch_funcs/v092/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(retrieve) 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/.gitignore: -------------------------------------------------------------------------------- 1 | /dataset 2 | /results 3 | */__pycache__ -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CMakeLists.txt 2 | graft ucm 3 | graft examples 4 | graft benchmarks 5 | -------------------------------------------------------------------------------- /ucm/sparse/gsa/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(offload_ops) 2 | add_subdirectory(prefetch) 3 | -------------------------------------------------------------------------------- /ucm/store/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(BUILD_UNIT_TESTS) 2 | include(GoogleTest) 3 | endif() 4 | -------------------------------------------------------------------------------- /docs/source/_static/css/logo.css: -------------------------------------------------------------------------------- 1 | .navbar-brand img { 2 | max-width: 180px; 3 | height: auto; 4 | } -------------------------------------------------------------------------------- /docs/source/logos/UCM-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/logos/UCM-dark.png -------------------------------------------------------------------------------- /ucm/sparse/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(esa) 2 | add_subdirectory(gsa) 3 | add_subdirectory(kvcomp) 4 | add_subdirectory(kvstar) 5 | -------------------------------------------------------------------------------- /docs/source/logos/UCM-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/logos/UCM-light.png -------------------------------------------------------------------------------- /docs/source/_static/images/idea.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/idea.png -------------------------------------------------------------------------------- /.github/actionlint.yaml: -------------------------------------------------------------------------------- 1 | self-hosted-runner: 2 | # Labels of self-hosted runner in array of strings. 3 | labels: 4 | - default 5 | - arc-runner-ucm -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/figs/kvcomp_scheme.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sparse/kvcomp/figs/kvcomp_scheme.jpg -------------------------------------------------------------------------------- /ucm/store/nfsstore/device/simu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(storedevice STATIC simu_device.cc) 2 | target_link_libraries(storedevice PUBLIC infra_status) 3 | -------------------------------------------------------------------------------- /docs/source/_static/images/GSA_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/GSA_overview.png -------------------------------------------------------------------------------- /docs/source/_static/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/architecture.png -------------------------------------------------------------------------------- /docs/source/_static/images/blend_scheme.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/blend_scheme.jpg -------------------------------------------------------------------------------- /docs/source/_static/images/kvcomp_scheme.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvcomp_scheme.jpg -------------------------------------------------------------------------------- /docs/source/_static/images/kvstar_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvstar_diagram.png -------------------------------------------------------------------------------- /docs/source/_static/images/prefix_cache.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/prefix_cache.jpg -------------------------------------------------------------------------------- /docs/source/_static/images/ucconn_ucmconn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/ucconn_ucmconn.png -------------------------------------------------------------------------------- /ucm/shared/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(vendor) 2 | add_subdirectory(infra) 3 | add_subdirectory(trans) 4 | add_subdirectory(metrics) 5 | add_subdirectory(test) 6 | -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/figs/kvcomp_longbench.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sparse/kvcomp/figs/kvcomp_longbench.jpg -------------------------------------------------------------------------------- /docs/source/_static/images/kvcomp_longbench.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvcomp_longbench.jpg -------------------------------------------------------------------------------- /docs/source/_static/images/kvstar_retrieve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvstar_retrieve.png -------------------------------------------------------------------------------- /docs/source/_static/images/nfs_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/nfs_performance.png -------------------------------------------------------------------------------- /docs/source/_static/images/sparse_attn_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/sparse_attn_arch.png -------------------------------------------------------------------------------- /requirements-lint.txt: -------------------------------------------------------------------------------- 1 | # formatting 2 | pre-commit==4.0.1 3 | 4 | # type checking 5 | mypy==1.11.1 6 | types-PyYAML 7 | types-regex 8 | types-requests 9 | types-setuptools -------------------------------------------------------------------------------- /docs/source/_static/images/attention_overhead.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/attention_overhead.png -------------------------------------------------------------------------------- /docs/source/_static/images/attention_sparsity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/attention_sparsity.png -------------------------------------------------------------------------------- /docs/source/_static/images/pd_disaggregation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/pd_disaggregation.jpg -------------------------------------------------------------------------------- /docs/source/_static/images/qrcode_for_wechat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/qrcode_for_wechat.png -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/paper/kvcomp-ACL-2025-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sparse/kvcomp/paper/kvcomp-ACL-2025-paper.pdf -------------------------------------------------------------------------------- /docs/source/_static/images/mooncake_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/mooncake_performance.png -------------------------------------------------------------------------------- /docs/source/_static/paper/kvcomp-ACL-2025-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/paper/kvcomp-ACL-2025-paper.pdf -------------------------------------------------------------------------------- /ucm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(shared) 2 | if(BUILD_UCM_STORE) 3 | add_subdirectory(store) 4 | endif() 5 | if(BUILD_UCM_SPARSE) 6 | add_subdirectory(sparse) 7 | endif() 8 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/misc/flexreduc_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sandbox/sparse/retake/misc/flexreduc_pipeline.png -------------------------------------------------------------------------------- /docs/source/_static/images/GSA-E2E-offload-throughput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/GSA-E2E-offload-throughput.png -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/figs/kvcomp_end_to_end_performance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sparse/kvcomp/figs/kvcomp_end_to_end_performance.jpg -------------------------------------------------------------------------------- /docs/source/_static/images/esa_async_retrieval_and_load.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/esa_async_retrieval_and_load.png -------------------------------------------------------------------------------- /docs/source/_static/images/kvcomp_end_to_end_performance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvcomp_end_to_end_performance.jpg -------------------------------------------------------------------------------- /ucm/shared/trans/simu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(trans STATIC 2 | simu_device.cc 3 | simu_buffer.cc 4 | simu_stream.cc 5 | ) 6 | target_link_libraries(trans PUBLIC 7 | fmt 8 | ) 9 | -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/figs/kvcomp_single_layer_performance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sparse/kvcomp/figs/kvcomp_single_layer_performance.jpg -------------------------------------------------------------------------------- /docs/source/_static/images/GSA-E2E-non-offload-throughput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/GSA-E2E-non-offload-throughput.png -------------------------------------------------------------------------------- /docs/source/_static/images/kvcomp_single_layer_performance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvcomp_single_layer_performance.jpg -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | reports/ 2 | dataset/ 3 | logs/ 4 | result_outputs/ 5 | results/ 6 | .cache/ 7 | backup/ 8 | sites/Demo/* 9 | $null 10 | *__pycache__/ 11 | .* 12 | *.log 13 | start.bat 14 | !.gitignore -------------------------------------------------------------------------------- /docs/start.cmd: -------------------------------------------------------------------------------- 1 | pip install -r requirements-docs.txt 2 | start "" /wait cmd /c .\make.bat clean 3 | start "" /wait cmd /c .\make.bat html 4 | start python -m http.server -d build/html/ 5 | start http://localhost:8000 6 | -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx-argparse 3 | sphinx-book-theme 4 | sphinx-copybutton 5 | sphinx-design 6 | sphinx-togglebutton 7 | myst-parser 8 | msgspec 9 | sphinx-substitution-extensions 10 | sphinx-intl -------------------------------------------------------------------------------- /ucm/store/detail/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB_RECURSE UCM_STORE_DETAIL_SOURCE "*.*") 2 | add_library(storedetail OBJECT ${UCM_STORE_DETAIL_SOURCE}) 3 | target_include_directories(storedetail PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 4 | -------------------------------------------------------------------------------- /test/requirements.txt: -------------------------------------------------------------------------------- 1 | #pytest 2 | pytest>=7.0.0 3 | pytest-html>=3.1.1 4 | PyYAML>=6.0 5 | #database 6 | peewee>=3.14.5 7 | psycopg2-binary>=2.8 8 | #llmperf 9 | requests>=2.10.0 10 | pandas>=2.3.0 11 | pydantic>=2.12.0 12 | transformers>=4.0.0 -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/task_status.h: -------------------------------------------------------------------------------- 1 | #ifndef KVSTAR_RETRIEVE_CLIB_TASK_STATUS_H 2 | #define KVSTAR_RETRIEVE_CLIB_TASK_STATUS_H 3 | 4 | namespace KVStar { 5 | enum class TaskStatus { PENDING, RUNNING, SUCCESS, FAILURE }; 6 | } 7 | 8 | #endif //KVSTAR_RETRIEVE_CLIB_TASK_STATUS_H 9 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.4.0 2 | torchvision==0.19.0 3 | transformers==4.45.2 4 | flash-attn==2.6.3 5 | accelerate==0.34.2 6 | av==13.1.0 7 | pyyaml==6.0.2 8 | opencv-python-headless==4.10.0.84 9 | pandas==2.2.3 10 | pysubs2==1.7.3 11 | pyarrow==17.0.0 12 | openai==1.56.0 13 | tqdm==4.67.1 -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/scripts/infer_eval.sh: -------------------------------------------------------------------------------- 1 | ckpt_path=$1 2 | config_path=$2 3 | num_gpus=$3 4 | frame_extraction_fps=$4 5 | 6 | PYTHONPATH=$PYTHONPATH:./ python retake/infer_eval.py \ 7 | --hf_qwen2vl7b_path $ckpt_path \ 8 | --config_path $config_path \ 9 | --n_gpus $num_gpus \ 10 | --video_frame_extraction_fps $frame_extraction_fps \ 11 | "${@:5}" -------------------------------------------------------------------------------- /ucm/store/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(detail) 2 | add_library(storeintf INTERFACE) 3 | target_include_directories(storeintf INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) 4 | target_link_libraries(storeintf INTERFACE storedetail infra_status) 5 | add_subdirectory(nfsstore) 6 | add_subdirectory(pcstore) 7 | add_subdirectory(mooncakestore) 8 | add_subdirectory(test) 9 | -------------------------------------------------------------------------------- /.github/workflows/matchers/mypy.json: -------------------------------------------------------------------------------- 1 | { 2 | "problemMatcher": [ 3 | { 4 | "owner": "mypy", 5 | "pattern": [ 6 | { 7 | "regexp": "^(.+):(\\d+):\\s(error|warning):\\s(.+)$", 8 | "file": 1, 9 | "line": 2, 10 | "severity": 3, 11 | "message": 4 12 | } 13 | ] 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/simd_compute_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef KVSTAR_RETRIEVE_SIMD_COMPUTE_KERNEL_H 2 | #define KVSTAR_RETRIEVE_SIMD_COMPUTE_KERNEL_H 3 | 4 | #include "retrieve_task.h" 5 | #include "task_result.h" 6 | 7 | namespace KVStar { 8 | 9 | void Execute(const RetrieveTask& task, TaskResult& result); 10 | 11 | } 12 | 13 | 14 | #endif //KVSTAR_RETRIEVE_SIMD_COMPUTE_KERNEL_H -------------------------------------------------------------------------------- /.github/workflows/matchers/ruff.json: -------------------------------------------------------------------------------- 1 | { 2 | "problemMatcher": [ 3 | { 4 | "owner": "ruff", 5 | "pattern": [ 6 | { 7 | "regexp": "^(.+?):(\\d+):(\\d+): (\\w+): (.+)$", 8 | "file": 1, 9 | "line": 2, 10 | "column": 3, 11 | "code": 4, 12 | "message": 5 13 | } 14 | ] 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Unified Cache Manager documents 2 | 3 | Live doc: Coming soon 4 | 5 | ## Build the docs 6 | 7 | ```bash 8 | # Install dependencies. 9 | pip install -r requirements-docs.txt 10 | 11 | # Build the docs. 12 | make clean 13 | make html 14 | 15 | 16 | # Open the docs with your browser 17 | python -m http.server -d build/html/ 18 | ``` 19 | 20 | Launch your browser and open: 21 | - English version: http://localhost:8000 -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/py_intf/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_library( 2 | TORCH_PYTHON_LIB_PATH 3 | torch_python 4 | HINTS ${TORCH_INSTALL_PREFIX}/lib 5 | ) 6 | 7 | pybind11_add_module( 8 | kvstar_retrieve 9 | py_intf.cpp 10 | ) 11 | 12 | target_link_libraries( 13 | kvstar_retrieve 14 | PRIVATE 15 | kvstar_retrieve.core 16 | ${TORCH_PYTHON_LIB_PATH} 17 | ${Torch_LIBRARIES} 18 | ) -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/computation_task.h: -------------------------------------------------------------------------------- 1 | #ifndef KVSTAR_RETRIEVE_CLIB_COMPUTATION_TASK_H 2 | #define KVSTAR_RETRIEVE_CLIB_COMPUTATION_TASK_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace KVStar { 9 | 10 | struct PlainTensor { 11 | void* data = nullptr; 12 | std::vector shape; 13 | std::vector strides; 14 | }; 15 | 16 | 17 | } 18 | 19 | 20 | 21 | #endif -------------------------------------------------------------------------------- /ucm/store/nfsstore/device/musa/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(MUSA_ROOT "/usr/local/musa/" CACHE PATH "Path to MUSA root directory") 2 | add_library(Musa::musart UNKNOWN IMPORTED) 3 | set_target_properties(Musa::musart PROPERTIES 4 | INTERFACE_INCLUDE_DIRECTORIES "${MUSA_ROOT}/include" 5 | IMPORTED_LOCATION "${MUSA_ROOT}/lib/libmusart.so" 6 | ) 7 | 8 | add_library(storedevice STATIC musa_device.cc) 9 | target_link_libraries(storedevice PUBLIC infra_status Musa::musart) 10 | -------------------------------------------------------------------------------- /ucm/shared/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(BUILD_UNIT_TESTS) 2 | include(GoogleTest) 3 | file(GLOB_RECURSE UCMSHARED_TEST_SOURCE_FILES "./case/*.cc") 4 | add_executable(ucmshared.test ${UCMSHARED_TEST_SOURCE_FILES}) 5 | target_include_directories(ucmshared.test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/case) 6 | target_link_libraries(ucmshared.test PRIVATE 7 | trans 8 | gtest_main gtest 9 | ) 10 | gtest_discover_tests(ucmshared.test) 11 | endif() 12 | -------------------------------------------------------------------------------- /.github/workflows/unifiedcache_test.yml: -------------------------------------------------------------------------------- 1 | name: 'ucm-lint-and-unittest' 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'main' 7 | - 'dev*' 8 | - '*release' 9 | - 'feature*' 10 | pull_request: 11 | branches: 12 | - 'main' 13 | - 'dev*' 14 | - '*release' 15 | - 'feature*' 16 | 17 | jobs: 18 | # gpu-test: 19 | # uses: ./.github/workflows/e2e_test.yml 20 | 21 | call-lint: 22 | uses: ./.github/workflows/pre-commit.yml 23 | -------------------------------------------------------------------------------- /ucm/store/nfsstore/device/cuda/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CUDA_ROOT "/usr/local/cuda/" CACHE PATH "Path to CUDA root directory") 2 | set(CMAKE_CUDA_COMPILER ${CUDA_ROOT}/bin/nvcc) 3 | set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90) 4 | enable_language(CUDA) 5 | add_library(storedevice STATIC cuda_device.cu) 6 | target_link_libraries(storedevice PUBLIC infra_status) 7 | target_compile_options(storedevice PRIVATE 8 | --diag-suppress=128 --diag-suppress=2417 --diag-suppress=2597 9 | -Wall -fPIC 10 | ) 11 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Set to other image if needed 2 | FROM vllm/vllm-openai:v0.9.2 3 | 4 | ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" 5 | 6 | WORKDIR /workspace 7 | 8 | # Install unified-cache-management 9 | COPY . /workspace/unified-cache-management 10 | 11 | RUN pip config set global.index-url ${PIP_INDEX_URL} 12 | 13 | RUN export PLATFORM="cuda" && \ 14 | pip install -v -e /workspace/unified-cache-management --no-build-isolation 15 | 16 | 17 | ENTRYPOINT ["/bin/bash"] -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_vl/qwen2-vl_mlvu.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_vl 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | 7 | ### dataset 8 | dataset_name: mlvu 9 | anno_file: dataset/mlvu/mlvu.json 10 | dataloader_num_workers: 2 11 | 12 | ### data 13 | sample_fps: 4 14 | max_num_frames: 256 15 | longsize_resolution: 448 16 | 17 | ### generate 18 | do_sample: false 19 | 20 | ### output 21 | output_dir: results/qwen2vl_7b_mlvu_f256_4fps_r448/base 22 | -------------------------------------------------------------------------------- /ucm/store/nfsstore/device/ascend/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(ASCEND_ROOT "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "Path to Ascend root directory") 2 | add_library(Ascend::ascendcl UNKNOWN IMPORTED) 3 | set_target_properties(Ascend::ascendcl PROPERTIES 4 | INTERFACE_INCLUDE_DIRECTORIES "${ASCEND_ROOT}/include" 5 | IMPORTED_LOCATION "${ASCEND_ROOT}/lib64/libascendcl.so" 6 | ) 7 | 8 | add_library(storedevice STATIC ascend_device.cc) 9 | target_link_libraries(storedevice PUBLIC infra_status Ascend::ascendcl) 10 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/llava_video/llava-video_mlvu.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: llava_video 3 | method: retake 4 | attn_implementation: "flash_attention_2" 5 | 6 | ### dataset 7 | dataset_name: mlvu 8 | anno_file: dataset/mlvu/mlvu.json 9 | dataloader_num_workers: 4 10 | 11 | ### data 12 | sample_fps: 2 13 | max_num_frames: 64 14 | longsize_resolution: 682 # short-side can be 384 15 | 16 | ### generate 17 | do_sample: false 18 | 19 | ### output 20 | output_dir: results/llava-video_mlvu_f64_2fps_r682/base 21 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_5_vl/qwen2-5-vl_videomme_f256.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_5_vl 3 | method: retake 4 | attn_implementation: "flash_attention_2" 5 | 6 | ### dataset 7 | dataset_name: videomme 8 | anno_file: dataset/video_mme/video_mme.json 9 | dataloader_num_workers: 4 10 | 11 | ### data 12 | sample_fps: 2 13 | max_num_frames: 256 14 | longsize_resolution: 448 15 | 16 | ### generate 17 | do_sample: false 18 | 19 | ### output 20 | output_dir: results/qwen2_5_vl_7b_videomme_f256_2fps_r448/base 21 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_vl/qwen2-vl_lvbench.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_vl 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | 7 | ### dataset 8 | dataset_name: lvbench 9 | anno_file: dataset/lvbench/lvbench.json 10 | dataloader_num_workers: 2 11 | 12 | ### data 13 | sample_fps: 2 14 | max_num_frames: 256 15 | longsize_resolution: 448 16 | 17 | ### generate 18 | do_sample: false 19 | 20 | ### output 21 | output_dir: results/qwen2vl_7b_lvbench_f256_2fps_r448/base 22 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/infra/template/singleton.h: -------------------------------------------------------------------------------- 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_SINGLETON_H 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_SINGLETON_H 3 | 4 | template 5 | class Singleton { 6 | public: 7 | Singleton(const Singleton&) = delete; 8 | Singleton& operator=(const Singleton&) = delete; 9 | static T* Instance() 10 | { 11 | static T t; 12 | return &t; 13 | } 14 | 15 | private: 16 | Singleton() = default; 17 | }; 18 | 19 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_SINGLETON_H -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/llava_video/llava-video_lvbench.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: llava_video 3 | method: retake 4 | attn_implementation: "flash_attention_2" 5 | 6 | ### dataset 7 | dataset_name: lvbench 8 | anno_file: dataset/lvbench/lvbench.json 9 | dataloader_num_workers: 4 10 | 11 | ### data 12 | sample_fps: 2 13 | max_num_frames: 64 14 | longsize_resolution: 682 # short-side can be 384 15 | 16 | ### generate 17 | do_sample: false 18 | 19 | ### output 20 | output_dir: results/llava-video_lvbench_f64_2fps_r682/base 21 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_vl/qwen2-vl_videomme.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_vl 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | 7 | ### dataset 8 | dataset_name: videomme 9 | anno_file: dataset/video_mme/video_mme.json 10 | dataloader_num_workers: 2 11 | 12 | ### data 13 | sample_fps: 4 14 | max_num_frames: 256 15 | longsize_resolution: 448 16 | 17 | ### generate 18 | do_sample: false 19 | 20 | ### output 21 | output_dir: results/qwen2vl_7b_videomme_f256_4fps_r448/base 22 | -------------------------------------------------------------------------------- /.github/workflows/matchers/actionlint.json: -------------------------------------------------------------------------------- 1 | { 2 | "problemMatcher": [ 3 | { 4 | "owner": "actionlint", 5 | "pattern": [ 6 | { 7 | "regexp": "^(?:\\x1b\\[\\d+m)?(.+?)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*: (?:\\x1b\\[\\d+m)*(.+?)(?:\\x1b\\[\\d+m)* \\[(.+?)\\]$", 8 | "file": 1, 9 | "line": 2, 10 | "column": 3, 11 | "message": 4, 12 | "code": 5 13 | } 14 | ] 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/llava_video/llava-video_videomme.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: llava_video 3 | method: retake 4 | attn_implementation: "flash_attention_2" 5 | 6 | ### dataset 7 | dataset_name: videomme 8 | anno_file: dataset/video_mme/video_mme.json 9 | dataloader_num_workers: 4 10 | 11 | ### data 12 | sample_fps: 2 13 | max_num_frames: 64 14 | longsize_resolution: 682 # short-side can be 384 15 | 16 | ### generate 17 | do_sample: false 18 | 19 | ### output 20 | output_dir: results/llava-video_video_mme_f64_2fps_r682/base 21 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_runner.h: -------------------------------------------------------------------------------- 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_RUNNER_H 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_RUNNER_H 3 | 4 | #include "status/status.h" 5 | #include "retrieve_task.h" 6 | #include "task_result.h" 7 | 8 | 9 | namespace KVStar { 10 | 11 | class RetrieveTaskRunner { 12 | public: 13 | RetrieveTaskRunner(){} 14 | Status Run(const RetrieveTask& task, TaskResult& result); 15 | }; 16 | 17 | 18 | } 19 | 20 | 21 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_RUNNER_H -------------------------------------------------------------------------------- /ucm/__init__.py: -------------------------------------------------------------------------------- 1 | from ucm.integration.vllm.ucm_connector import UCMConnector 2 | 3 | try: 4 | from ucm.integration.vllm.patch.apply_patch import ensure_patches_applied 5 | 6 | ensure_patches_applied() 7 | except Exception as e: 8 | # Don't fail if patches can't be applied - might be running in environment without vLLM 9 | import warnings 10 | 11 | warnings.warn( 12 | f"Failed to apply vLLM patches: {e}. " 13 | f"If you're using vLLM, ensure it's installed and patches are compatible." 14 | ) 15 | 16 | __all__ = ["UCMConnector"] 17 | -------------------------------------------------------------------------------- /ucm/shared/trans/ascend/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(ASCEND_ROOT "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "Path to Ascend root directory") 2 | add_library(Ascend::ascendcl UNKNOWN IMPORTED) 3 | set_target_properties(Ascend::ascendcl PROPERTIES 4 | INTERFACE_INCLUDE_DIRECTORIES "${ASCEND_ROOT}/include" 5 | IMPORTED_LOCATION "${ASCEND_ROOT}/lib64/libascendcl.so" 6 | ) 7 | add_library(trans STATIC 8 | ascend_device.cc 9 | ascend_buffer.cc 10 | ascend_stream.cc 11 | ) 12 | target_link_libraries(trans PUBLIC 13 | fmt 14 | Ascend::ascendcl 15 | ) 16 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB_RECURSE CORE_SRC_FILES "*.cpp" "api/*.cpp" "api/**/*.cpp" "domain/*.cpp" "domain/**/*.cpp" "infra/*.cpp" "infra/**/*.cpp") 2 | 3 | add_library(kvstar_retrieve.core STATIC ${CORE_SRC_FILES}) 4 | 5 | target_include_directories(kvstar_retrieve.core PUBLIC 6 | "." "api" "domain" "infra" 7 | ${NUMA_INSTALL_DIR}/include 8 | ) 9 | 10 | target_link_libraries(kvstar_retrieve.core PUBLIC 11 | spdlog::spdlog 12 | fmt::fmt 13 | $<$:${NUMA_INSTALL_DIR}/lib/libnuma.so> 14 | ${Torch_LIBRARIES} 15 | ) 16 | -------------------------------------------------------------------------------- /docs/source/about.md: -------------------------------------------------------------------------------- 1 | # About Us 2 | 3 | UCM is rooted in KV Cache, with the goal of reducing inference costs and building commercially viable inference 4 | solutions. It enhances throughput through methods such as Prefix Cache, sparsification, and PD Disaggregation. 5 | 6 | The UCM team consists of a group of "lazy" people who love simple things and also enjoy "borrowing" the excellent 7 | experiences of others. Adhering to the principle of full openness, we hope everyone will generously share their 8 | insights. We also welcome everyone to learn from these experiences together, engage in discussions, and help us make 9 | progress. -------------------------------------------------------------------------------- /docker/Dockerfile-NPU: -------------------------------------------------------------------------------- 1 | # Set to other image if needed 2 | FROM quay.io/ascend/vllm-ascend:v0.9.2rc1 3 | 4 | ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" 5 | 6 | WORKDIR /workspace 7 | 8 | # Install unified-cache-management 9 | COPY . /workspace/unified-cache-management 10 | 11 | RUN pip config set global.index-url ${PIP_INDEX_URL} 12 | 13 | RUN export PLATFORM="ascend" && \ 14 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ 15 | pip install -v -e /workspace/unified-cache-management --no-build-isolation 16 | 17 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/task_result.h: -------------------------------------------------------------------------------- 1 | #ifndef KVSTAR_RETRIEVE_CLIB_TASK_RESULT_H 2 | #define KVSTAR_RETRIEVE_CLIB_TASK_RESULT_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "domain/retrieve_task/task_status.h" 10 | 11 | 12 | namespace KVStar { 13 | struct TaskResult { 14 | std::atomic status{TaskStatus::PENDING}; 15 | std::vector topkIndices; 16 | std::string errorMessage; 17 | std::mutex mtx; 18 | TaskResult() = default; 19 | }; 20 | 21 | } // namespace KVStar 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/infra/memory/memory.cpp: -------------------------------------------------------------------------------- 1 | #include "memory.h" 2 | #include 3 | 4 | namespace KVStar { 5 | 6 | std::shared_ptr MakePtr(void *ptr) { 7 | if (!ptr) { return nullptr; } 8 | return std::shared_ptr(ptr, [](void *ptr) { free(ptr); }); 9 | } 10 | 11 | std::shared_ptr Memory::Alloc(const size_t size) { return MakePtr(malloc(size)); } 12 | 13 | std::shared_ptr Memory::AllocAlign(const size_t size) { 14 | void *ptr = nullptr; 15 | auto ret = posix_memalign(&ptr, _alignment, size); 16 | if (ret != 0) { return nullptr; } 17 | return MakePtr(ptr); 18 | } 19 | } -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/environment_npu.yaml: -------------------------------------------------------------------------------- 1 | name: retake 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python==3.11 6 | - pip: 7 | - numpy==1.26.4 8 | - scipy==1.14.1 9 | - torch==2.4.0 10 | - torch-npu==2.4.0 11 | - torchvision==0.19.0 12 | - transformers==4.45.2 13 | - accelerate==0.34.2 14 | - av==13.1.0 15 | - pyyaml==6.0.2 16 | - opencv-python-headless==4.10.0.84 17 | - pandas==2.2.3 18 | - pysubs2==1.7.3 19 | - pyarrow==17.0.0 20 | - openai==1.56.0 21 | - tqdm==4.67.1 22 | - attrs==23.2.0 23 | - decorator==5.2.1 24 | -------------------------------------------------------------------------------- /ucm/sparse/esa/retrieval/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 添加编译目标 2 | pybind11_add_module(retrieval_backend cpy/retrieval_backend.cpp) 3 | 4 | # 设置输出库的目录 5 | file(RELATIVE_PATH INSTALL_REL_PATH 6 | ${CMAKE_SOURCE_DIR} 7 | ${CMAKE_CURRENT_SOURCE_DIR} 8 | ) 9 | install(TARGETS retrieval_backend LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm) 10 | 11 | # 设置头文件目录,以确保 numaf.h 能找到 12 | target_include_directories(retrieval_backend PUBLIC 13 | ${NUMA_INSTALL_DIR}/include 14 | ${Torch_INCLUDE_DIRS} 15 | ) 16 | 17 | # 链接所需的库 18 | target_link_libraries(retrieval_backend PUBLIC 19 | $<$:${NUMA_INSTALL_DIR}/lib/libnuma.so> 20 | ${Torch_LIBRARIES} 21 | ) 22 | -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/hash_retrieval/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 添加编译目标 2 | pybind11_add_module(hash_retrieval_backend cpy/hash_retrieval_backend.cpp) 3 | 4 | file(RELATIVE_PATH INSTALL_REL_PATH 5 | ${CMAKE_SOURCE_DIR} 6 | ${CMAKE_CURRENT_SOURCE_DIR} 7 | ) 8 | install(TARGETS hash_retrieval_backend LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm) 9 | 10 | # 设置头文件目录,以确保 numaf.h 能找到 11 | target_include_directories(hash_retrieval_backend PUBLIC 12 | ${NUMA_INSTALL_DIR}/include 13 | ${Torch_INCLUDE_DIRS} 14 | ) 15 | 16 | # 链接所需的库 17 | target_link_libraries(hash_retrieval_backend PUBLIC 18 | $<$:${NUMA_INSTALL_DIR}/lib/libnuma.so> 19 | ${Torch_LIBRARIES} 20 | ) -------------------------------------------------------------------------------- /test/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = suites 3 | python_files = test_*.py 4 | python_classes = Test* 5 | python_functions = test_* 6 | 7 | addopts = 8 | -ra 9 | --capture=no 10 | filterwarnings = 11 | ignore::pytest.PytestReturnNotNoneWarning 12 | 13 | log_cli = 1 14 | log_cli_level = INFO 15 | log_cli_format = [%(levelname)s] %(name)s: %(message)s 16 | norecursedirs = .git venv env __pycache__ *.egg 17 | 18 | markers = 19 | # -------- Levels (Required) -------- 20 | stage: Unit/Smoke/Regression/Release (0=Unit 1=Smoke 2=Regression 3=Release) 21 | # -------- Features (Recommended) -------- 22 | feature: Feature tag 23 | platform: Platform tag(gpu/npu) 24 | # end of markers -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version, and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.12" 12 | 13 | # Build documentation in the "docs/" directory with Sphinx 14 | sphinx: 15 | configuration: docs/source/conf.py 16 | 17 | # Optionally, but recommended, 18 | # declare the Python requirements required to build your documentation 19 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 20 | python: 21 | install: 22 | - requirements: docs/requirements-docs.txt 23 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/infra/memory/memory.h: -------------------------------------------------------------------------------- 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_MEMORY_H 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_MEMORY_H 3 | 4 | #include 5 | #include 6 | 7 | namespace KVStar { 8 | 9 | class Memory { 10 | public: 11 | static bool Aligned(const size_t size) { return size % _alignment == 0;} 12 | static size_t Align(const size_t size) { return (size + _alignment - 1) / _alignment * _alignment; } 13 | static std::shared_ptr Alloc(const size_t size); 14 | static std::shared_ptr AllocAlign(const size_t size); 15 | 16 | private: 17 | static constexpr size_t _alignment{4096}; 18 | }; 19 | } 20 | 21 | 22 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_MEMORY_H -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/100-documentation.yml: -------------------------------------------------------------------------------- 1 | name: 📚 Documentation 2 | description: Report an issue related to ucm official website 3 | title: "[Doc]: " 4 | labels: ["documentation"] 5 | 6 | body: 7 | - type: textarea 8 | attributes: 9 | label: 📚 The doc issue 10 | description: > 11 | A clear and concise description of what content in unifiedcache official website is an issue. 12 | validations: 13 | required: true 14 | - type: textarea 15 | attributes: 16 | label: Suggest a potential alternative/fix 17 | description: > 18 | Tell us how we could improve the documentation in this regard. 19 | - type: markdown 20 | attributes: 21 | value: > 22 | Thanks for contributing 🎉! 23 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /ucm/store/pcstore/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB_RECURSE UCMSTORE_PC_CC_SOURCE_FILES "./cc/*.cc") 2 | add_library(pcstore STATIC ${UCMSTORE_PC_CC_SOURCE_FILES}) 3 | target_include_directories(pcstore PUBLIC 4 | ${CMAKE_CURRENT_SOURCE_DIR}/cc/api 5 | ${CMAKE_CURRENT_SOURCE_DIR}/cc/domain 6 | ) 7 | target_link_libraries(pcstore PUBLIC storeintf trans infra_logger) 8 | 9 | file(GLOB_RECURSE UCMSTORE_PC_CPY_SOURCE_FILES "./cpy/*.cc") 10 | pybind11_add_module(ucmpcstore ${UCMSTORE_PC_CPY_SOURCE_FILES}) 11 | target_link_libraries(ucmpcstore PRIVATE pcstore) 12 | 13 | file(RELATIVE_PATH INSTALL_REL_PATH 14 | ${CMAKE_SOURCE_DIR} 15 | ${CMAKE_CURRENT_SOURCE_DIR} 16 | ) 17 | install(TARGETS ucmpcstore LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm) 18 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/codespell-project/codespell 3 | rev: v2.4.1 4 | hooks: 5 | - id: codespell 6 | args: [ 7 | '--skip', 'ucm/csrc/**,./ucm.egg-info/**,.github/**', 8 | '-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn' 9 | ] 10 | - repo: https://github.com/psf/black 11 | rev: 24.4.2 12 | hooks: 13 | - id: black 14 | language_version: python3 15 | - repo: https://github.com/PyCQA/isort 16 | rev: 6.0.1 17 | hooks: 18 | - id: isort 19 | args: 20 | - "--profile=black" 21 | - repo: https://github.com/rhysd/actionlint 22 | rev: v1.7.7 23 | hooks: 24 | - id: actionlint 25 | default_stages: 26 | - pre-commit 27 | - manual -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=64", 4 | "cmake>=3.18", 5 | "wheel", 6 | ] 7 | build-backend = "setuptools.build_meta" 8 | 9 | [project] 10 | name = "uc-manager" 11 | authors = [{name = "Unified Cache Team"}] 12 | license = { file="LICENSE" } 13 | readme = "README.md" 14 | description = "Persist and reuse KV Cache to speedup your LLM." 15 | requires-python = ">=3.10" 16 | dynamic = [ "version", "dependencies", "optional-dependencies"] 17 | 18 | [project.urls] 19 | Homepage="https://github.com/ModelEngine-Group/unified-cache-management" 20 | Documentation="https://ucm.readthedocs.io/en/latest" 21 | WeChat="https://github.com/ModelEngine-Group/unified-cache-management/blob/develop/docs/source/_static/images/qrcode_for_wechat.png?raw=true" 22 | -------------------------------------------------------------------------------- /test/common/llmperf/utils/common_metrics.py: -------------------------------------------------------------------------------- 1 | # TODO (Avnishn): compute metrics in class 2 | INTER_TOKEN_LAT = "inter_token_latency_s" 3 | TTFT = "ttft_s" 4 | E2E_LAT = "end_to_end_latency_s" 5 | NUM_INPUT_TOKENS = "number_input_tokens" 6 | NUM_OUTPUT_TOKENS = "number_output_tokens" 7 | NUM_TOTAL_TOKENS = "number_total_tokens" 8 | REQ_OUTPUT_THROUGHPUT = "request_output_throughput_token_per_s" 9 | ERROR_MSG = "error_msg" 10 | ERROR_CODE = "error_code" 11 | ERROR_CODE_FREQ = "error_code_frequency" 12 | NUM_ERRORS = "number_errors" 13 | OUTPUT_THROUGHPUT = "mean_output_throughput_token_per_s" 14 | NUM_COMPLETED_REQUESTS = "num_completed_requests" 15 | COMPLETED_REQUESTS_PER_MIN = "num_completed_requests_per_min" 16 | ERROR_RATE = "error_rate" 17 | NUM_REQ_STARTED = "num_requests_started" 18 | -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | check_command() { 5 | if ! command -v "$1" &> /dev/null; then 6 | echo "❓❓$1 is not installed, please run:" 7 | echo "# Install lint deps" 8 | echo "pip install -r requirements-lint.txt" 9 | echo "# (optional) Enable git commit pre check" 10 | echo "pre-commit install" 11 | echo "" 12 | echo "See step by step contribution guide:" 13 | echo "Unifiedcache Official Website" 14 | exit 1 15 | fi 16 | } 17 | 18 | check_command pre-commit 19 | 20 | # TODO: cleanup SC exclude 21 | export SHELLCHECK_OPTS="--exclude=SC2046,SC2006,SC2086" 22 | if [[ "$1" != 'ci' ]]; then 23 | pre-commit run --all-files 24 | else 25 | pre-commit run --all-files --hook-stage manual 26 | fi -------------------------------------------------------------------------------- /ucm/shared/trans/cuda/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CUDA_ROOT "/usr/local/cuda/" CACHE PATH "Path to CUDA root directory") 2 | set(CMAKE_CUDA_COMPILER ${CUDA_ROOT}/bin/nvcc) 3 | set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90) 4 | enable_language(CUDA) 5 | add_library(kernel OBJECT cuda_sm_kernel.cu) 6 | target_compile_options(kernel PRIVATE 7 | --diag-suppress=128 --diag-suppress=2417 --diag-suppress=2597 8 | -Wall -fPIC 9 | ) 10 | add_library(trans STATIC 11 | cuda_device.cc 12 | cuda_buffer.cc 13 | cuda_stream.cc 14 | cuda_sm_stream.cc 15 | ) 16 | target_include_directories(trans PUBLIC ${CUDA_ROOT}/include) 17 | target_link_directories(trans PUBLIC ${CUDA_ROOT}/lib64) 18 | target_link_libraries(trans PUBLIC 19 | fmt 20 | cudart 21 | nvidia-ml 22 | kernel 23 | ) 24 | -------------------------------------------------------------------------------- /ucm/store/nfsstore/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(device) 2 | file(GLOB_RECURSE UCMSTORE_NFS_CC_SOURCE_FILES "./cc/*.cc") 3 | add_library(nfsstore STATIC ${UCMSTORE_NFS_CC_SOURCE_FILES}) 4 | target_include_directories(nfsstore PUBLIC 5 | ${CMAKE_CURRENT_SOURCE_DIR} 6 | ${CMAKE_CURRENT_SOURCE_DIR}/cc/api 7 | ${CMAKE_CURRENT_SOURCE_DIR}/cc/domain 8 | ) 9 | target_link_libraries(nfsstore PUBLIC storeintf storedevice infra_logger) 10 | 11 | file(GLOB_RECURSE UCMSTORE_NFS_CPY_SOURCE_FILES "./cpy/*.cc") 12 | pybind11_add_module(ucmnfsstore ${UCMSTORE_NFS_CPY_SOURCE_FILES}) 13 | target_link_libraries(ucmnfsstore PRIVATE nfsstore) 14 | 15 | file(RELATIVE_PATH INSTALL_REL_PATH 16 | ${CMAKE_SOURCE_DIR} 17 | ${CMAKE_CURRENT_SOURCE_DIR} 18 | ) 19 | install(TARGETS ucmnfsstore LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm) 20 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/demo.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | method: retake 3 | scaling_factor: 4 4 | attn_implementation: "flash_attention_2" 5 | longvideo_kwargs: { 6 | 'frame_chunk_size': 64, 7 | 'chunked_prefill_frames': 32, 8 | # KVCache compression 9 | 'kvcache_compression': True, 10 | 'kvcache_compression_kwargs': { 11 | 'compression_method': 'stdvidlkv', 12 | 'dynamic_compression_ratio': True, 13 | 'prompt_guided_compression': True, 14 | 'pos_embed_reforge': False, 15 | 'max_input_length': 16000, 16 | # Temporal 17 | 'enable_temporal_adaptation': True, 18 | 'temporal_adaptation_ratio': 4, 19 | # Layer 20 | 'budget_allocation_method': 'adakv', 21 | }, 22 | } 23 | 24 | ### data 25 | sample_fps: 4 26 | max_num_frames: 2048 27 | longsize_resolution: 448 28 | 29 | ### generate 30 | do_sample: false -------------------------------------------------------------------------------- /ucm/store/nfsstore/device/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(RUNTIME_ENVIRONMENT STREQUAL "ascend") 2 | add_subdirectory(ascend) 3 | elseif(RUNTIME_ENVIRONMENT STREQUAL "musa") 4 | add_subdirectory(musa) 5 | elseif(RUNTIME_ENVIRONMENT STREQUAL "maca") 6 | add_subdirectory(maca) 7 | elseif(RUNTIME_ENVIRONMENT STREQUAL "cuda") 8 | add_subdirectory(cuda) 9 | elseif(RUNTIME_ENVIRONMENT STREQUAL "simu") 10 | add_subdirectory(simu) 11 | else() 12 | message(FATAL_ERROR "RUNTIME_ENVIRONMENT must be one of: ascend, musa, cuda, simu. Current value: ${RUNTIME_ENVIRONMENT}") 13 | endif() 14 | 15 | if(TARGET storedevice) 16 | target_include_directories(storedevice PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 17 | else() 18 | message(FATAL_ERROR "storedevice target was not created. Check RUNTIME_ENVIRONMENT setting and subdirectory CMakeLists.txt files.") 19 | endif() 20 | -------------------------------------------------------------------------------- /ucm/store/nfsstore/device/maca/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CUDA_ROOT "/opt/maca/tools/cu-bridge" CACHE PATH "Path to WCUDA root directory") 2 | set(CMAKE_CUDA_COMPILER ${CUDA_ROOT}/bin/cucc) 3 | list(APPEND CMAKE_MODULE_PATH "${CUDA_ROOT}/cmake_module/maca") 4 | set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90) 5 | enable_language(CUDA) 6 | 7 | add_library(storedevice STATIC maca_device.cu) 8 | 9 | add_library(WCUDA::cudart UNKNOWN IMPORTED) 10 | set_target_properties(WCUDA::cudart PROPERTIES 11 | INTERFACE_INCLUDE_DIRECTORIES "${CUDA_ROOT}/include" 12 | IMPORTED_LOCATION "${CUDA_ROOT}/lib/libcuda.so" 13 | ) 14 | target_include_directories(WCUDA::cudart INTERFACE 15 | /opt/maca/include 16 | /opt/maca/include/mcr 17 | ) 18 | 19 | target_link_libraries(storedevice PUBLIC infra_status WCUDA::cudart) 20 | target_compile_options(storedevice PRIVATE -Wall -fPIC -std=c++17) 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Linker files 15 | *.ilk 16 | 17 | # Debugger Files 18 | *.pdb 19 | 20 | # Compiled Dynamic libraries 21 | *.so 22 | *.dylib 23 | *.dll 24 | 25 | # Fortran module files 26 | *.mod 27 | *.smod 28 | 29 | # Compiled Static libraries 30 | *.lai 31 | *.la 32 | *.a 33 | *.lib 34 | 35 | # Executables 36 | *.exe 37 | *.out 38 | *.app 39 | 40 | # Debug information files 41 | *.dwo 42 | 43 | # Development environment files 44 | *.code-workspace 45 | .vscode/** 46 | .idea/** 47 | .git/** 48 | **/build/** 49 | **/output/** 50 | .venv/** 51 | **/__pycache__/** 52 | *.egg-info/** 53 | reports/ 54 | dataset/ 55 | logs/ 56 | .* 57 | *.log 58 | result_outputs/ 59 | results/ 60 | .cache/ 61 | backup/ 62 | $null 63 | *__pycache__/ -------------------------------------------------------------------------------- /ucm/shared/trans/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(RUNTIME_ENVIRONMENT STREQUAL "ascend") 2 | add_subdirectory(ascend) 3 | endif() 4 | if(RUNTIME_ENVIRONMENT STREQUAL "maca") 5 | add_subdirectory(maca) 6 | endif() 7 | if(RUNTIME_ENVIRONMENT STREQUAL "cuda") 8 | add_subdirectory(cuda) 9 | endif() 10 | if(RUNTIME_ENVIRONMENT STREQUAL "simu") 11 | add_subdirectory(simu) 12 | endif() 13 | target_include_directories(trans PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/..) 14 | target_link_libraries(trans PUBLIC infra_status) 15 | 16 | file(GLOB_RECURSE UCMTRANS_CPY_SOURCE_FILES "./cpy/*.cc") 17 | pybind11_add_module(ucmtrans ${UCMTRANS_CPY_SOURCE_FILES}) 18 | target_link_libraries(ucmtrans PRIVATE trans) 19 | 20 | file(RELATIVE_PATH INSTALL_REL_PATH 21 | ${CMAKE_SOURCE_DIR} 22 | ${CMAKE_CURRENT_SOURCE_DIR} 23 | ) 24 | install(TARGETS ucmtrans LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm) -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/800-others.yml: -------------------------------------------------------------------------------- 1 | name: 🎲 Others 2 | description: Submit a discussion as you like. Note that developers are heavily overloaded and we mainly rely on community users to answer these issues. 3 | title: "[Misc]: " 4 | labels: ["misc"] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: > 10 | #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/ModelEngine-Group/unified-cache-management/issues?q=is%3Aissue%20sort%3Acreated-desc). 11 | - type: textarea 12 | attributes: 13 | label: Anything you want to discuss about ucm. 14 | description: > 15 | Anything you want to discuss about unifiedcache. 16 | validations: 17 | required: true 18 | - type: markdown 19 | attributes: 20 | value: > 21 | Thanks for contributing 🎉! 22 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/infra/logger/logger.h: -------------------------------------------------------------------------------- 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_LOGGER_H 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_LOGGER_H 3 | 4 | #include 5 | 6 | namespace KVStar { 7 | 8 | class Logger { 9 | public: 10 | static std::shared_ptr Make(); 11 | }; 12 | 13 | } 14 | 15 | #define KVSTAR_LOG(level, ...) \ 16 | KVStar::Logger::Make()->log(spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, level, __VA_ARGS__) 17 | #define KVSTAR_DEBUG(...) KVSTAR_LOG(spdlog::level::debug, __VA_ARGS__) 18 | #define KVSTAR_INFO(...) KVSTAR_LOG(spdlog::level::info, __VA_ARGS__) 19 | #define KVSTAR_WARN(...) KVSTAR_LOG(spdlog::level::warn, __VA_ARGS__) 20 | #define KVSTAR_ERROR(...) KVSTAR_LOG(spdlog::level::err, __VA_ARGS__) 21 | 22 | 23 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_LOGGER_H -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/demo_npu.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | method: retake 3 | scaling_factor: 4 4 | # attn_implementation: "sdpa" 5 | attn_implementation: "eager" # If your NPU does not support sdpa attention 6 | longvideo_kwargs: { 7 | 'frame_chunk_size': 16, 8 | 'chunked_prefill_frames': 16, 9 | # KVCache compression 10 | 'kvcache_compression': True, 11 | 'kvcache_compression_kwargs': { 12 | 'compression_method': 'stdvidlkv', 13 | 'dynamic_compression_ratio': True, 14 | 'prompt_guided_compression': True, 15 | 'pos_embed_reforge': False, 16 | 'max_input_length': 16000, 17 | # Temporal 18 | 'enable_temporal_adaptation': True, 19 | 'temporal_adaptation_ratio': 4, 20 | # Layer 21 | 'budget_allocation_method': 'adakv', 22 | }, 23 | } 24 | 25 | ### data 26 | sample_fps: 4 27 | max_num_frames: 2048 28 | longsize_resolution: 448 29 | 30 | ### generate 31 | do_sample: false -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/infra/thread/latch.h: -------------------------------------------------------------------------------- 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_LATCH_H 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_LATCH_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace KVStar { 9 | class Latch { 10 | public: 11 | explicit Latch(const size_t expected = 0) : _counter{expected} {} 12 | void Up() { ++this->_counter; } 13 | size_t Done() { return --this->_counter; } 14 | void Notify() { this->_cv.notify_all(); } 15 | void Wait() 16 | { 17 | std::unique_lock lk(this->_mutex); 18 | if (this->_counter == 0) { return; } 19 | this->_cv.wait(lk, [this] { return this->_counter == 0; }); 20 | } 21 | 22 | private: 23 | std::mutex _mutex; 24 | std::condition_variable _cv; 25 | std::atomic _counter; 26 | }; 27 | 28 | } 29 | 30 | 31 | 32 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_LATCH_H -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_vl/retake_qwen2-vl_videomme.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_vl 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | longvideo_kwargs: { 7 | 'frame_chunk_size': 128, 8 | 'chunked_prefill_frames': 32, 9 | # KVCache compression 10 | 'kvcache_compression': True, 11 | 'kvcache_compression_kwargs': { 12 | 'dynamic_compression_ratio': True, 13 | 'compression_method': 'pivotkv', 14 | 'pos_embed_reforge': True, 15 | 'max_input_length': 32000 16 | }, 17 | } 18 | 19 | 20 | ### dataset 21 | dataset_name: videomme 22 | anno_file: dataset/video_mme/video_mme.json 23 | dataloader_num_workers: 2 24 | 25 | ### data 26 | sample_fps: 4 27 | max_num_frames: 2048 28 | longsize_resolution: 448 29 | 30 | ### generate 31 | do_sample: false 32 | 33 | ### output 34 | output_dir: results/qwen2vl_7b_video_mme_f2048_4fps_r448/retake_pivot-32k 35 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | workflow_call: 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | pre-commit: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout repository 14 | uses: actions/checkout@v4 15 | 16 | - name: Set up Python 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: "3.12" 20 | 21 | - name: Add matchers for better error display 22 | run: | 23 | echo "::add-matcher::.github/workflows/matchers/actionlint.json" 24 | echo "::add-matcher::.github/workflows/matchers/mypy.json" 25 | 26 | - name: Run pre-commit checks on all files 27 | uses: pre-commit/action@v3.0.1 28 | env: 29 | SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint 30 | with: 31 | extra_args: --all-files --hook-stage manual 32 | -------------------------------------------------------------------------------- /docs/source/developer-guide/contribute.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | ## Building and testing 3 | It’s recommended to set up a local development environment to build and test before you submit a PR. 4 | ### Run lint locally 5 | Run following commands to format your code before submit: 6 | ```bash 7 | # Choose a base dir (~/vllm-project/) and set up venv 8 | cd ~/vllm-project/ 9 | python3 -m venv .venv 10 | source ./.venv/bin/activate 11 | 12 | # Clone UCM and install 13 | git clone https://github.com/ModelEngine-Group/unified-cache-management.git 14 | cd unified-cache-management 15 | 16 | # Install lint requirement and enable pre-commit hook 17 | pip install -r requirements-lint.txt 18 | 19 | # Run lint (You need install pre-commits deps via proxy network at first time) 20 | bash format.sh 21 | ``` 22 | ### Run unit test locally 23 | Run unit test locally with following command: 24 | ```bash 25 | python3 -m unittest discover -s test 26 | ``` 27 | -------------------------------------------------------------------------------- /test/common/llmperf/utils/models.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, Tuple 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class RequestConfig(BaseModel): 7 | """The configuration for a request to the LLM API. 8 | 9 | Args: 10 | model: The model to use. 11 | prompt: The prompt to provide to the LLM API. 12 | sampling_params: Additional sampling parameters to send with the request. 13 | For more information see the Router app's documentation for the completions 14 | llm_api: The name of the LLM API to send the request to. 15 | metadata: Additional metadata to attach to the request for logging or validation purposes. 16 | """ 17 | 18 | model: str 19 | prompt: Tuple[str, int] 20 | sampling_params: Optional[Dict[str, Any]] = None 21 | llm_api: Optional[str] = None 22 | metadata: Optional[Dict[str, Any]] = None 23 | openai_api_base: Optional[str] = "" 24 | -------------------------------------------------------------------------------- /.github/workflows/cpp-linter.yml: -------------------------------------------------------------------------------- 1 | name: cpp-linter 2 | 3 | on: 4 | push: 5 | branches: [ "*" ] 6 | pull_request: 7 | branches: [ "dev*", "main", "*release", "feature*" ] 8 | 9 | 10 | jobs: 11 | cpp-linter: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 15 | with: 16 | persist-credentials: false 17 | - uses: cpp-linter/cpp-linter-action@main 18 | id: linter 19 | continue-on-error: true 20 | env: 21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | with: 23 | style: file 24 | tidy-checks: '-*' 25 | files-changed-only: true 26 | lines-changed-only: diff 27 | format-review: true 28 | version: 20 29 | 30 | - name: Fail fast?! 31 | if: steps.linter.outputs.checks-failed != 0 32 | run: | 33 | echo "some linter checks failed. ${{ steps.linter.outputs.checks-failed }}" 34 | exit 1 35 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_waiter.h: -------------------------------------------------------------------------------- 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_WAITER_H 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_WAITER_H 3 | 4 | #include 5 | #include 6 | #include "logger/logger.h" 7 | #include "thread/latch.h" 8 | 9 | namespace KVStar { 10 | 11 | class RetrieveTaskWaiter : public Latch { 12 | public: 13 | RetrieveTaskWaiter(const size_t taskId, const size_t waitCounter) 14 | : Latch{waitCounter}, _taskId{taskId}, _waitCounter{waitCounter} 15 | { 16 | } 17 | 18 | void Done() 19 | { 20 | if (Latch::Done() == 0) { 21 | KVSTAR_DEBUG("Task({}, {}) finished, elapsed {:.06f}s", this->_taskId, this->_waitCounter, this->_sw.elapsed().count()); 22 | this->Notify(); 23 | } 24 | } 25 | 26 | private: 27 | size_t _taskId; 28 | size_t _waitCounter; 29 | spdlog::stopwatch _sw; 30 | 31 | }; 32 | 33 | } 34 | 35 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_WAITER_H 36 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | IndentWidth: 4 3 | ColumnLimit: 100 4 | AccessModifierOffset: -4 5 | AlwaysBreakTemplateDeclarations: true 6 | PointerAlignment: Left 7 | AlignArrayOfStructures: Left 8 | AllowShortBlocksOnASingleLine: true 9 | AllowShortCaseLabelsOnASingleLine: true 10 | AllowShortFunctionsOnASingleLine: All 11 | AllowShortIfStatementsOnASingleLine: true 12 | AllowShortLoopsOnASingleLine: true 13 | IncludeBlocks: Merge 14 | IncludeCategories: 15 | - Regex: '<.*>' 16 | Priority: 2 17 | - Regex: '.*' 18 | Priority: 3 19 | BreakBeforeBraces: Custom 20 | BraceWrapping: 21 | AfterClass: false 22 | AfterControlStatement: false 23 | AfterEnum: false 24 | AfterFunction: true 25 | AfterNamespace: false 26 | AfterObjCDeclaration: false 27 | AfterStruct: false 28 | AfterUnion: false 29 | AfterExternBlock: false 30 | BeforeCatch: false 31 | BeforeElse: false 32 | IndentBraces: false 33 | SplitEmptyFunction: true 34 | SplitEmptyRecord: true 35 | SplitEmptyNamespace: true 36 | -------------------------------------------------------------------------------- /ucm/shared/metrics/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB_RECURSE CORE_SRCS CONFIGURE_DEPENDS 2 | "${CMAKE_CURRENT_SOURCE_DIR}/cc/stats/*.cc" 3 | "${CMAKE_CURRENT_SOURCE_DIR}/cc/*.cc") 4 | add_library(monitor_static STATIC ${CORE_SRCS}) 5 | set_property(TARGET monitor_static PROPERTY POSITION_INDEPENDENT_CODE ON) 6 | target_include_directories(monitor_static PUBLIC 7 | $ 8 | $) 9 | set_target_properties(monitor_static PROPERTIES OUTPUT_NAME monitor) 10 | 11 | file(GLOB_RECURSE BINDINGS_SRCS CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/cpy/*.cc") 12 | pybind11_add_module(ucmmonitor ${BINDINGS_SRCS}) 13 | target_link_libraries(ucmmonitor PRIVATE -Wl,--whole-archive monitor_static -Wl,--no-whole-archive) 14 | target_include_directories(ucmmonitor PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cc) 15 | 16 | file(RELATIVE_PATH INSTALL_REL_PATH 17 | ${CMAKE_SOURCE_DIR} 18 | ${CMAKE_CURRENT_SOURCE_DIR} 19 | ) 20 | install(TARGETS ucmmonitor LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm) -------------------------------------------------------------------------------- /ucm/sparse/gsa/offload_ops/include/thread_safe_queue.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_SAFE_QUEUE_H 2 | #define THREAD_SAFE_QUEUE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | struct CopyInfo { 12 | bool needCalKpre; 13 | uint32_t layerId; 14 | std::vector locations; 15 | torch::Tensor ids; 16 | torch::Tensor srcTensor; 17 | }; 18 | 19 | class ThreadSafeQueue { 20 | public: 21 | ThreadSafeQueue(); 22 | ~ThreadSafeQueue() = default; 23 | 24 | ThreadSafeQueue(const ThreadSafeQueue&) = delete; 25 | ThreadSafeQueue& operator=(const ThreadSafeQueue&) = delete; 26 | 27 | void push(CopyInfo value); 28 | CopyInfo pop(); 29 | size_t size() const; 30 | bool empty() const; 31 | void stop(); 32 | void clear(); 33 | 34 | private: 35 | mutable std::mutex m_mutex; 36 | std::condition_variable m_condVar; 37 | std::queue m_queue; 38 | std::atomic m_stopped; 39 | }; 40 | 41 | #endif -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/infra/logger/logger.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "logger.h" 5 | 6 | namespace KVStar { 7 | 8 | static std::mutex g_mutex; 9 | static std::shared_ptr g_logger = nullptr; 10 | 11 | std::shared_ptr Logger::Make() 12 | { 13 | if (g_logger) { return g_logger; } 14 | std::unique_lock lock(g_mutex); 15 | if (g_logger) { return g_logger; } 16 | try { 17 | const std::string name = "KVSTAR_RETRIEVE"; 18 | const std::string envLevel = name + "_LOGGER_LEVEL"; 19 | g_logger = spdlog::stdout_color_mt(name); 20 | g_logger->set_pattern("[%Y-%m-%d %H:%M:%S.%f %z] [%n] [%^%L%$] %v [PID: %P, TID: %t] [%s:%#,%!]"); 21 | auto level = spdlog::details::os::getenv(envLevel.c_str()); 22 | if (!level.empty()) { spdlog::cfg::helpers::load_levels(level); } 23 | return g_logger; 24 | } catch (...) { 25 | return spdlog::default_logger(); 26 | } 27 | } 28 | 29 | } -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/api/kvstar_retrieve/kvstar_retrieve.h: -------------------------------------------------------------------------------- 1 | #ifndef KVSTAR_RETRIEVE_CLIB_KVSTAR_RETRIEVE_H 2 | #define KVSTAR_RETRIEVE_CLIB_KVSTAR_RETRIEVE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include // for std::iota 8 | #include "retrieve_task/retrieve_task.h" 9 | #include "retrieve_task/retrieve_task_manager.h" 10 | #include "template/singleton.h" 11 | 12 | namespace KVStar { 13 | 14 | struct SetupParam { 15 | std::vector cpuNumaIds; 16 | std::vector> bindInfo; // coreId, numaId 17 | DeviceType deviceType; 18 | int totalTpSize; 19 | int localRankId; 20 | int threadNum; 21 | 22 | SetupParam(const std::vector& cpuNumaIds, const std::vector>& bindInfo, 23 | const DeviceType deviceType, const int totalTpSize, const int localRankId); 24 | 25 | }; 26 | 27 | int32_t Setup(const SetupParam& param); 28 | 29 | int32_t Wait(const size_t taskId); 30 | 31 | 32 | } // namespace KVStar 33 | 34 | 35 | 36 | #endif //KVSTAR_RETRIEVE_CLIB_KVSTAR_RETRIEVE_H 37 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_5_vl/flexreduc_qwen2-5-vl_mlvu.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_5_vl 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | longvideo_kwargs: { 7 | 'chunked_prefill_frames': 32, 8 | 'frame_chunk_size': 64, 9 | # KVCache compression 10 | 'kvcache_compression': True, 11 | 'kvcache_compression_kwargs': { 12 | 'compression_method': 'stdvidlkv', 13 | 'dynamic_compression_ratio': True, 14 | 'prompt_guided_compression': True, 15 | 'pos_embed_reforge': False, 16 | 'max_input_length': 16000, 17 | # Temporal 18 | 'enable_temporal_adaptation': True, 19 | 'temporal_adaptation_ratio': 4, 20 | # Layer 21 | 'budget_allocation_method': 'adakv', 22 | }, 23 | } 24 | 25 | 26 | ### dataset 27 | dataset_name: mlvu 28 | anno_file: dataset/mlvu/mlvu.json 29 | dataloader_num_workers: 4 30 | 31 | ### data 32 | sample_fps: 2 33 | max_num_frames: 2048 34 | longsize_resolution: 448 35 | 36 | ### generate 37 | do_sample: false 38 | 39 | ### output 40 | output_dir: results/qwen25vl_7b_mlvu_f2048_2fps_r448/adaretake-16k 41 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_5_vl/flexreduc_qwen2-5-vl_lvbench.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_5_vl 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | longvideo_kwargs: { 7 | 'chunked_prefill_frames': 32, 8 | 'frame_chunk_size': 64, 9 | # KVCache compression 10 | 'kvcache_compression': True, 11 | 'kvcache_compression_kwargs': { 12 | 'compression_method': 'stdvidlkv', 13 | 'dynamic_compression_ratio': True, 14 | 'prompt_guided_compression': True, 15 | 'pos_embed_reforge': True, 16 | 'max_input_length': 16000, 17 | # Temporal 18 | 'enable_temporal_adaptation': True, 19 | 'temporal_adaptation_ratio': 4, 20 | # Layer 21 | 'budget_allocation_method': 'adakv', 22 | }, 23 | } 24 | 25 | 26 | ### dataset 27 | dataset_name: lvbench 28 | anno_file: dataset/lvbench/lvbench.json 29 | dataloader_num_workers: 4 30 | 31 | ### data 32 | sample_fps: 2 33 | max_num_frames: 2048 34 | longsize_resolution: 448 35 | 36 | ### generate 37 | do_sample: false 38 | 39 | ### output 40 | output_dir: results/qwen25vl_7b_lvbench_f2048_2fps_r448/adaretake-16k_reforge 41 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_5_vl/flexreduc_qwen2-5-vl_videomme.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_5_vl 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | longvideo_kwargs: { 7 | 'chunked_prefill_frames': 32, 8 | 'frame_chunk_size': 64, 9 | # KVCache compression 10 | 'kvcache_compression': True, 11 | 'kvcache_compression_kwargs': { 12 | 'compression_method': 'stdvidlkv', 13 | 'dynamic_compression_ratio': True, 14 | 'prompt_guided_compression': True, 15 | 'pos_embed_reforge': True, 16 | 'max_input_length': 16000, 17 | # Temporal 18 | 'enable_temporal_adaptation': True, 19 | 'temporal_adaptation_ratio': 4, 20 | # Layer 21 | 'budget_allocation_method': 'adakv', 22 | }, 23 | } 24 | 25 | 26 | ### dataset 27 | dataset_name: videomme 28 | anno_file: dataset/video_mme/video_mme.json 29 | dataloader_num_workers: 4 30 | 31 | ### data 32 | sample_fps: 4 33 | max_num_frames: 2048 34 | longsize_resolution: 448 35 | 36 | ### generate 37 | do_sample: false 38 | 39 | ### output 40 | output_dir: results/qwen25vl_7b_videomme_f2048_4fps_r448/adaretake-16k_reforge 41 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_vl/retake_qwen2-vl_mlvu.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_vl 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | longvideo_kwargs: { 7 | 'frame_chunk_size': 128, 8 | 'chunked_prefill_frames': 32, 9 | # Keyframe compression 10 | 'visual_compression': True, 11 | 'visual_compression_kwargs': { 12 | 'compression_ratio': 1.0, 13 | 'compression_method': 'Keyframe', 14 | 'patch_sync': False, 15 | 'return_keyframe_mask': True 16 | }, 17 | # KVCache compression 18 | 'kvcache_compression': True, 19 | 'kvcache_compression_kwargs': { 20 | 'dynamic_compression_ratio': True, 21 | 'compression_method': 'pivotkv', 22 | 'pos_embed_reforge': True, 23 | 'max_input_length': 32000 24 | }, 25 | } 26 | 27 | ### dataset 28 | dataset_name: mlvu 29 | anno_file: dataset/mlvu/mlvu.json 30 | dataloader_num_workers: 2 31 | 32 | ### data 33 | sample_fps: 4 34 | max_num_frames: 2048 35 | longsize_resolution: 448 36 | 37 | ### generate 38 | do_sample: false 39 | 40 | ### output 41 | output_dir: results/qwen2vl_7b_mlvu_f2048_4fps_r448/retake_dp1-async_pivot-32k 42 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/llava_video/retake_llava-video_lvbench.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: llava_video 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | longvideo_kwargs: { 7 | 'frame_chunk_size': 32, 8 | 'chunked_prefill_frames': 32, 9 | # Keyframe compression 10 | 'visual_compression': True, 11 | 'visual_compression_kwargs': { 12 | 'compression_ratio': 1.0, 13 | 'compression_method': 'Keyframe', 14 | 'patch_sync': False, 15 | 'return_keyframe_mask': True 16 | }, 17 | # KVCache compression 18 | 'kvcache_compression': True, 19 | 'kvcache_compression_kwargs': { 20 | 'dynamic_compression_ratio': True, 21 | 'compression_method': 'pivotkv', 22 | 'pos_embed_reforge': True, 23 | 'max_input_length': 40000 24 | }, 25 | } 26 | 27 | ### dataset 28 | dataset_name: lvbench 29 | anno_file: dataset/lvbench/lvbench.json 30 | dataloader_num_workers: 4 31 | 32 | ### data 33 | sample_fps: 2 34 | max_num_frames: 1024 35 | longsize_resolution: 682 36 | 37 | ### generate 38 | do_sample: false 39 | 40 | ### output 41 | output_dir: results/llava-video_f1024_2fps_r682/retake_dp1-async_pivot-40k 42 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/llava_video/retake_llava-video_mlvu.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: llava_video 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | longvideo_kwargs: { 7 | 'frame_chunk_size': 32, 8 | 'chunked_prefill_frames': 32, 9 | # Keyframe compression 10 | 'visual_compression': True, 11 | 'visual_compression_kwargs': { 12 | 'compression_ratio': 1.0, 13 | 'compression_method': 'Keyframe', 14 | 'patch_sync': False, 15 | 'return_keyframe_mask': True 16 | }, 17 | # KVCache compression 18 | 'kvcache_compression': True, 19 | 'kvcache_compression_kwargs': { 20 | 'dynamic_compression_ratio': True, 21 | 'compression_method': 'pivotkv', 22 | 'pos_embed_reforge': True, 23 | 'max_input_length': 40000 24 | }, 25 | } 26 | 27 | ### dataset 28 | dataset_name: mlvu 29 | anno_file: dataset/mlvu/mlvu.json 30 | dataloader_num_workers: 4 31 | 32 | ### data 33 | sample_fps: 2 34 | max_num_frames: 1024 35 | longsize_resolution: 682 36 | 37 | ### generate 38 | do_sample: false 39 | 40 | ### output 41 | output_dir: results/llava-video_rope4_mlvu_f1024_2fps_r682/retake_dp1-async_pivot-40k 42 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_vl/retake_qwen2-vl_lvbench.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_vl 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | longvideo_kwargs: { 7 | 'frame_chunk_size': 128, 8 | 'chunked_prefill_frames': 32, 9 | # Keyframe compression 10 | 'visual_compression': True, 11 | 'visual_compression_kwargs': { 12 | 'compression_ratio': 1.0, 13 | 'compression_method': 'Keyframe', 14 | 'patch_sync': False, 15 | 'return_keyframe_mask': True 16 | }, 17 | # KVCache compression 18 | 'kvcache_compression': True, 19 | 'kvcache_compression_kwargs': { 20 | 'dynamic_compression_ratio': True, 21 | 'compression_method': 'pivotkv', 22 | 'pos_embed_reforge': True, 23 | 'max_input_length': 32000 24 | }, 25 | } 26 | 27 | ### dataset 28 | dataset_name: lvbench 29 | anno_file: dataset/lvbench/lvbench.json 30 | dataloader_num_workers: 2 31 | 32 | ### data 33 | sample_fps: 2 34 | max_num_frames: 2048 35 | longsize_resolution: 448 36 | 37 | ### generate 38 | do_sample: false 39 | 40 | ### output 41 | output_dir: results/qwen2vl_7b_lvbench_f2048_2fps_r448/retake_dp1-async_pivot-32k 42 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_runner.cpp: -------------------------------------------------------------------------------- 1 | #include "retrieve_task_runner.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "logger/logger.h" 8 | #include "memory/memory.h" 9 | #include "template/singleton.h" 10 | #include "simd_compute_kernel.h" 11 | 12 | namespace KVStar { 13 | 14 | Status RetrieveTaskRunner::Run(const RetrieveTask& task, TaskResult& result) { 15 | try { 16 | KVSTAR_DEBUG("Task {} starting pure C++ computation.", task.allocTaskId); 17 | 18 | KVStar::Execute(task, result); 19 | 20 | KVSTAR_DEBUG("Task {} pure C++ computation finished successfully.", task.allocTaskId); 21 | 22 | 23 | } catch (const std::exception& e) { 24 | KVSTAR_ERROR("Task {} failed during computation in Runner. Error: {}", task.allocTaskId, e.what()); 25 | 26 | { 27 | std::lock_guard lock(result.mtx); 28 | result.errorMessage = e.what(); 29 | result.status.store(TaskStatus::FAILURE, std::memory_order_release); 30 | } 31 | 32 | 33 | } 34 | 35 | return Status::OK(); 36 | } 37 | 38 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /test/config.yaml: -------------------------------------------------------------------------------- 1 | reports: 2 | base_dir: "results/reports" 3 | use_timestamp: true 4 | directory_prefix: "pytest" 5 | html: # pytest-html 6 | enabled: false 7 | filename: "report.html" 8 | title: "UCM Pytest Test Report" 9 | 10 | database: 11 | backup: "results/" 12 | enabled: true 13 | host: "127.0.0.1" 14 | port: 5432 15 | name: "ucm_test" 16 | user: "postgres" 17 | password: "123456" 18 | 19 | models: 20 | ip_ports: "" 21 | tokenizer_path: "" 22 | served_model_name: "" 23 | payload: '' 24 | enable_clear_hbm: false 25 | 26 | # LLM Connection Configuration 27 | llm_connection: 28 | model: "" 29 | server_url: "" 30 | tokenizer_path: "" 31 | stream: true # stream output 32 | ignore_eos: true # Ignore the returned terminator 33 | timeout: 180 # request time out 34 | 35 | # Environment Pre-Check Configuration 36 | Env_preCheck: 37 | master_ip: 192.168.0.1 38 | worker_ip: 39 | ascend_rt_visible_devices: "" 40 | node_num: 41 | model_path: "" 42 | hf_model_name: "" 43 | middle_page: "" 44 | expected_embed_bandwidth: 10 45 | expected_fetch_bandwidth: 10 46 | kvCache_block_number: 1024 47 | storage_backends: [""] -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/llava_video/retake_llava-video_videomme.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: llava_video 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | longvideo_kwargs: { 7 | 'frame_chunk_size': 32, 8 | 'chunked_prefill_frames': 32, 9 | # Keyframe compression 10 | 'visual_compression': True, 11 | 'visual_compression_kwargs': { 12 | 'compression_ratio': 1.0, 13 | 'compression_method': 'Keyframe', 14 | 'patch_sync': False, 15 | 'return_keyframe_mask': True 16 | }, 17 | # KVCache compression 18 | 'kvcache_compression': True, 19 | 'kvcache_compression_kwargs': { 20 | 'dynamic_compression_ratio': True, 21 | 'compression_method': 'pivotkv', 22 | 'pos_embed_reforge': True, 23 | 'max_input_length': 40000 24 | }, 25 | } 26 | 27 | ### dataset 28 | dataset_name: videomme 29 | anno_file: dataset/video_mme/video_mme.json 30 | dataloader_num_workers: 4 31 | 32 | ### data 33 | sample_fps: 2 34 | max_num_frames: 1024 35 | longsize_resolution: 682 36 | 37 | ### generate 38 | do_sample: false 39 | 40 | ### output 41 | output_dir: results/llava-video_rope4_video_mme_f1024_2fps_r682/retake_dp1-async_pivot-40k 42 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/configs/qwen2_5_vl/flexreduc_qwen2-5-vl_longvideobench.yaml: -------------------------------------------------------------------------------- 1 | ### model 2 | model_name: qwen2_5_vl 3 | method: retake 4 | scaling_factor: 4 5 | attn_implementation: "flash_attention_2" 6 | longvideo_kwargs: { 7 | 'chunked_prefill_frames': 32, 8 | 'frame_chunk_size': 64, 9 | # KVCache compression 10 | 'kvcache_compression': True, 11 | 'kvcache_compression_kwargs': { 12 | 'compression_method': 'stdvidlkv', 13 | 'dynamic_compression_ratio': True, 14 | 'prompt_guided_compression': True, 15 | 'max_guide_length': 152, 16 | 'pos_embed_reforge': False, 17 | 'max_input_length': 16000, 18 | # Temporal 19 | 'enable_temporal_adaptation': True, 20 | 'temporal_adaptation_ratio': 4, 21 | # Layer 22 | 'budget_allocation_method': 'adakv', 23 | }, 24 | } 25 | 26 | 27 | ### dataset 28 | dataset_name: longvideobench 29 | anno_file: dataset/longvideobench/longvideobench_val.json 30 | dataloader_num_workers: 4 31 | 32 | ### data 33 | sample_fps: 2 34 | max_num_frames: 2048 35 | longsize_resolution: 448 36 | 37 | ### generate 38 | do_sample: false 39 | 40 | ### output 41 | output_dir: results/qwen25vl_7b_longvideobench_f2048_2fps_r448/adaretake-16k 42 | -------------------------------------------------------------------------------- /ucm/shared/trans/maca/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CUDA_ROOT "/opt/maca/tools/cu-bridge" CACHE PATH "Path to WCUDA root directory") 2 | set(CMAKE_CUDA_COMPILER ${CUDA_ROOT}/bin/cucc) 3 | list(APPEND CMAKE_MODULE_PATH "${CUDA_ROOT}/cmake_module/maca") 4 | enable_language(CUDA) 5 | add_library(kernel OBJECT maca_sm_kernel.cu) 6 | target_compile_options(kernel PRIVATE 7 | -Wall -fPIC 8 | -std=c++17 9 | ) 10 | add_library(trans STATIC 11 | ${CMAKE_CURRENT_LIST_DIR}/../cuda/cuda_device.cc 12 | ${CMAKE_CURRENT_LIST_DIR}/../cuda/cuda_buffer.cc 13 | ${CMAKE_CURRENT_LIST_DIR}/../cuda/cuda_stream.cc 14 | ${CMAKE_CURRENT_LIST_DIR}/../cuda/cuda_sm_stream.cc 15 | ) 16 | 17 | add_library(WCUDA::cudart UNKNOWN IMPORTED) 18 | set_target_properties(WCUDA::cudart PROPERTIES 19 | INTERFACE_INCLUDE_DIRECTORIES "${CUDA_ROOT}/include" 20 | IMPORTED_LOCATION "${CUDA_ROOT}/lib/libcuda.so" 21 | ) 22 | target_include_directories(WCUDA::cudart INTERFACE 23 | /opt/maca/include 24 | /opt/maca/include/mcr 25 | ) 26 | 27 | target_include_directories(trans PUBLIC ${CUDA_ROOT}/include) 28 | target_link_directories(trans PUBLIC ${CUDA_ROOT}/lib64) 29 | target_link_libraries(trans PUBLIC 30 | fmt 31 | WCUDA::cudart 32 | kernel 33 | ) 34 | -------------------------------------------------------------------------------- /ucm/sparse/gsa/offload_ops/src/thread_safe_queue.cpp: -------------------------------------------------------------------------------- 1 | #include "thread_safe_queue.h" 2 | 3 | ThreadSafeQueue::ThreadSafeQueue() : m_stopped(false) {} 4 | 5 | void ThreadSafeQueue::push(CopyInfo value) { 6 | std::lock_guard lock(m_mutex); 7 | m_queue.push(std::move(value)); 8 | m_condVar.notify_one(); 9 | } 10 | 11 | CopyInfo ThreadSafeQueue::pop() { 12 | std::unique_lock lock(m_mutex); 13 | m_condVar.wait(lock, [this] { 14 | return !m_queue.empty() || m_stopped; 15 | }); 16 | CopyInfo value = std::move(m_queue.front()); 17 | m_queue.pop(); 18 | return value; 19 | } 20 | 21 | size_t ThreadSafeQueue::size() const { 22 | std::lock_guard lock(m_mutex); 23 | return m_queue.size(); 24 | } 25 | 26 | bool ThreadSafeQueue::empty() const { 27 | std::lock_guard lock(m_mutex); 28 | return m_queue.empty(); 29 | } 30 | 31 | void ThreadSafeQueue::stop() { 32 | std::lock_guard lock(m_mutex); 33 | m_stopped = true; 34 | m_condVar.notify_all(); 35 | } 36 | 37 | void ThreadSafeQueue::clear() { 38 | std::lock_guard lock(m_mutex); 39 | while (!m_queue.empty()) { 40 | m_queue.pop(); 41 | } 42 | } -------------------------------------------------------------------------------- /ucm/sparse/gsa/offload_ops/src/pybinds.cpp: -------------------------------------------------------------------------------- 1 | #pragma GCC diagnostic push 2 | #include 3 | #include 4 | #include 5 | #include 6 | #pragma GCC diagnostic pop 7 | #include "cal_kpre_and_topk.h" 8 | 9 | PYBIND11_MODULE(gsa_offload_ops, m) 10 | { 11 | pybind11::class_(m, "CalKpreAndTopk") 12 | .def(pybind11::init()) 13 | .def_readwrite("k_cache", &CalKpreAndTopk::m_kCache) 14 | .def_readwrite("q_cache", &CalKpreAndTopk::m_qCache) 15 | .def("set_kpre_method_param", &CalKpreAndTopk::SetKpreMethodParam) 16 | .def("set_kpre_cache", &CalKpreAndTopk::SetKpreCache) 17 | .def("set_topk_cache", &CalKpreAndTopk::SetTopkCache) 18 | .def("set_common_param", &CalKpreAndTopk::SetCommonParam) 19 | .def("set_topk_param", &CalKpreAndTopk::SetTopkParam) 20 | .def("set_kpre_param", &CalKpreAndTopk::SetKpreParam) 21 | .def("set_kpre_data_ready", &CalKpreAndTopk::SetKpreDataReady) 22 | .def("set_topk_data_ready", &CalKpreAndTopk::SetTopkDataReady) 23 | .def("add_copy_req", &CalKpreAndTopk::AddCopyReq) 24 | .def("is_calculate_finish", &CalKpreAndTopk::IsCalculateFinish); 25 | } 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_manager.h: -------------------------------------------------------------------------------- 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_MANAGER_H 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_MANAGER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "retrieve_task_queue.h" 8 | #include "task_result.h" 9 | 10 | namespace KVStar { 11 | class RetrieveTaskManager { 12 | public: 13 | Status Setup(const size_t threadNum, const std::vector>& bindInfo); 14 | Status SubmitSingleTask(RetrieveTask&&task, size_t &taskId); 15 | 16 | Status GetResult(size_t taskId, std::shared_ptr& result); 17 | 18 | Status Wait(const size_t taskId); 19 | private: 20 | void Dispatch(); 21 | 22 | private: 23 | std::mutex _mutex; 24 | RetrieveTaskSet _failureSet; 25 | std::unordered_map> _waiters; 26 | 27 | std::unordered_map> _resultMap; 28 | 29 | std::vector> _queues; 30 | size_t _lastTimeScheduledQueueIdx{0}; 31 | size_t _taskIdSeed{0}; 32 | 33 | }; 34 | 35 | } 36 | 37 | 38 | 39 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_MANAGER_H 40 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_queue.h: -------------------------------------------------------------------------------- 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_QUEUE_H 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_QUEUE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "status/status.h" 10 | 11 | #include "retrieve_task.h" 12 | #include "retrieve_task_set.h" 13 | #include "task_result.h" 14 | 15 | namespace KVStar { 16 | struct WorkItem { 17 | RetrieveTask task; 18 | std::shared_ptr result; 19 | }; 20 | 21 | class RetrieveTaskQueue { 22 | public: 23 | ~RetrieveTaskQueue(); 24 | Status Setup(const int numaId, const int bindCoreId, RetrieveTaskSet* failureSet); // failureSet from manager, for all queue 25 | void Push(WorkItem&& item); 26 | 27 | private: 28 | void Worker(const int numaId, const int bindCoreId, std::promise& started); 29 | 30 | private: 31 | std::list _taskQ; 32 | std::mutex _mutex; 33 | std::condition_variable _cv; 34 | std::thread _worker; 35 | bool _running{false}; 36 | RetrieveTaskSet* _failureSet; 37 | 38 | 39 | }; 40 | } 41 | 42 | 43 | 44 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_QUEUE_H -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | # Purpose 8 | 9 | What this PR does / why we need it? 10 | 18 | 19 | # Modifications 20 | 21 | Does this PR introduce _any_ user-facing change? 22 | 26 | 27 | # Test 28 | 29 | How was this patch tested? 30 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/scripts/submission/prepare_lvbench_submission.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pandas as pd 5 | 6 | 7 | def load_jsonl(file): 8 | return [json.loads(line) for line in open(file, "r").readlines()] 9 | 10 | 11 | predict_result_dir = "./results/path_to_results" 12 | output_file = "./ReTaKe_LVBench_submission.json" 13 | 14 | LVBENCH_ANNO_FILE = "./dataset/lvbench/lvbench.json" 15 | 16 | 17 | ################ DO NOT CHANGE ################ 18 | def create_submission_file(predict_result_dir, output_file): 19 | results_df = pd.read_csv(os.path.join(predict_result_dir, "eval_results.csv")) 20 | 21 | video_id2results = {} 22 | res = results_df.loc[0] 23 | video_id2results["KIR"] = res["key information retrieval"] / 100 24 | video_id2results["EU"] = res["event understanding"] / 100 25 | video_id2results["Sum"] = res["summarization"] / 100 26 | video_id2results["ER"] = res["entity recognition"] / 100 27 | video_id2results["Rea"] = res["reasoning"] / 100 28 | video_id2results["TG"] = res["temporal grounding"] / 100 29 | video_id2results["Overall"] = res["overall"] / 100 30 | 31 | with open(output_file, "w") as f: 32 | json.dump(video_id2results, f, indent=2) 33 | 34 | 35 | create_submission_file(predict_result_dir, output_file) 36 | -------------------------------------------------------------------------------- /ucm/shared/infra/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB_RECURSE UCMINFRA_STATUS_SOURCE_FILES "status/*.*") 2 | add_library(infra_status OBJECT ${UCMINFRA_STATUS_SOURCE_FILES}) 3 | target_include_directories(infra_status PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 4 | target_link_libraries(infra_status PUBLIC fmt) 5 | 6 | file(GLOB UCMINFRA_LOGGER_SOURCE_FILES "logger/*.*") 7 | file(GLOB_RECURSE UCMINFRA_LOGGER_DETAIL_SOURCE_FILES "logger/${LOGGER_BACKEND}/*.cc") 8 | add_library(infra_logger OBJECT ${UCMINFRA_LOGGER_SOURCE_FILES} ${UCMINFRA_LOGGER_DETAIL_SOURCE_FILES}) 9 | target_include_directories(infra_logger PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 10 | target_link_libraries(infra_logger PUBLIC fmt spdlog) 11 | 12 | file(GLOB_RECURSE UCMINFRA_TEMPLATE_SOURCE_FILES "template/*.*") 13 | add_library(infra_template OBJECT ${UCMINFRA_TEMPLATE_SOURCE_FILES}) 14 | target_include_directories(infra_template PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 15 | 16 | file(GLOB_RECURSE UCMINFRA_THREAD_SOURCE_FILES "thread/*.*") 17 | add_library(infra_thread OBJECT ${UCMINFRA_THREAD_SOURCE_FILES}) 18 | target_include_directories(infra_thread PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 19 | 20 | file(GLOB_RECURSE UCMINFRA_TIME_SOURCE_FILES "time/*.*") 21 | add_library(infra_time OBJECT ${UCMINFRA_TIME_SOURCE_FILES}) 22 | target_include_directories(infra_time PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 23 | -------------------------------------------------------------------------------- /.github/workflows/e2e_test.yml: -------------------------------------------------------------------------------- 1 | name: offline_inference_test 2 | on: 3 | workflow_dispatch: 4 | 5 | jobs: 6 | offline-inference: 7 | runs-on: arc-runner-ucm 8 | steps: 9 | - uses: actions/checkout@v4 10 | - run: nvidia-smi 11 | - name: Run offline_inference in container 12 | run: | 13 | docker run --rm \ 14 | --gpus all \ 15 | -v ${{ github.workspace }}:/workspace/unified-cache-management \ 16 | -v /home_116/models/Qwen2.5-1.5B-Instruct:/home/models/Qwen2.5-1.5B-Instruct \ 17 | -w /workspace/unified-cache-management \ 18 | --entrypoint /bin/bash \ 19 | vllm/vllm-openai:v0.9.2 \ 20 | -c " 21 | set -euo pipefail 22 | export PLATFORM=cuda 23 | export MODEL_PATH=/home/models/Qwen2.5-1.5B-Instruct 24 | pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple 25 | pip install -v -e . --no-build-isolation 26 | cd \$(pip show vllm | grep Location | awk '{print \$2}') && 27 | git apply /workspace/unified-cache-management/ucm/integration/vllm/patch/0.9.2/vllm-adapt.patch 28 | cd /workspace/unified-cache-management 29 | python3 examples/offline_inference.py 30 | " -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/110-user-story.yml: -------------------------------------------------------------------------------- 1 | name: 📚 User Story 2 | description: Apply for an user story to be displayed on ucm official website 3 | title: "[User Story]: " 4 | labels: ["user-story"] 5 | 6 | body: 7 | - type: textarea 8 | attributes: 9 | label: 📚 Title 10 | description: > 11 | A clear title about what your user story is about. 12 | validations: 13 | required: true 14 | - type: textarea 15 | attributes: 16 | label: About / Introduction 17 | description: > 18 | A brief introduction about the background of your use case, like your scenario, hardware size etc. 19 | - type: textarea 20 | attributes: 21 | label: Bussiness Challenges 22 | description: > 23 | Tell us how what kind of challenge you faced in this user story. 24 | - type: textarea 25 | attributes: 26 | label: Solving challenges with ucm and benefits 27 | description: > 28 | Tell us how unifiedcache helped you overcome the challenges, including details like how you use it, what version you used, hardware info, etc. And what kind of benefit do you get from using unifiedcache 29 | - type: textarea 30 | attributes: 31 | label: Extra Info 32 | description: > 33 | Any extra infomation you want to include in this story 34 | - type: markdown 35 | attributes: 36 | value: > 37 | Thanks for contributing 🎉! 38 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/api/kvstar_retrieve/kvstar_retrieve.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "kvstar_retrieve.h" 4 | #include "status/status.h" 5 | #include "logger/logger.h" 6 | #include "template/singleton.h" 7 | #include "retrieve_task/retrieve_task_manager.h" 8 | 9 | namespace KVStar { 10 | SetupParam::SetupParam(const std::vector& cpuNumaIds, const std::vector>& bindInfo, const DeviceType deviceType, const int totalTpSize, const int localRankId) 11 | : cpuNumaIds{cpuNumaIds}, bindInfo{bindInfo}, deviceType{deviceType}, 12 | totalTpSize{totalTpSize}, localRankId{localRankId} 13 | { 14 | this->threadNum = this->bindInfo.size(); 15 | KVSTAR_DEBUG("Successfully configured. Total threads = {}.", this->threadNum); 16 | } 17 | 18 | 19 | int32_t Setup(const SetupParam& param) 20 | { 21 | 22 | auto status = Singleton::Instance()->Setup(param.threadNum, param.bindInfo); 23 | if (status.Failure()) { 24 | KVSTAR_ERROR("Failed({}) to setup RetrieveTaskManager.", status); 25 | return status.Underlying(); 26 | } 27 | KVSTAR_DEBUG("Setup RetrieveTaskManager success."); 28 | 29 | return Status::OK().Underlying(); 30 | } 31 | 32 | int32_t Wait(const size_t taskId) { 33 | return Singleton::Instance()->Wait(taskId).Underlying(); 34 | } 35 | 36 | 37 | } 38 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/500-feature-request.yml: -------------------------------------------------------------------------------- 1 | name: 🚀 Feature request 2 | description: Submit a proposal/request for a new ucm feature 3 | title: "[Feature]: " 4 | labels: ["feature request"] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: > 10 | #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/ModelEngine-Group/unified-cache-management/issues?q=is%3Aissue%20sort%3Acreated-desc). 11 | - type: textarea 12 | attributes: 13 | label: 🚀 The feature, motivation and pitch 14 | description: > 15 | A clear and concise description of the feature proposal. Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link here too. 16 | validations: 17 | required: true 18 | - type: textarea 19 | attributes: 20 | label: Alternatives 21 | description: > 22 | A description of any alternative solutions or features you've considered, if any. 23 | - type: textarea 24 | attributes: 25 | label: Additional context 26 | description: > 27 | Add any other context or screenshots about the feature request. 28 | - type: markdown 29 | attributes: 30 | value: > 31 | Thanks for contributing 🎉! 32 | -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_set.h: -------------------------------------------------------------------------------- 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_SET_H 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_SET_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace KVStar { 10 | class RetrieveTaskSet { 11 | static constexpr size_t nBucket = 8192; 12 | public: 13 | void Insert(const size_t id) 14 | { 15 | auto idx = this->Hash(id); 16 | std::unique_lock lk(this->_mutexes[idx]); 17 | this->_buckets[idx].push_back(id); 18 | } 19 | bool Exist(const size_t id) 20 | { 21 | auto idx = this->Hash(id); 22 | std::shared_lock lk(this->_mutexes[idx]); 23 | auto bucket = this->_buckets + idx; 24 | return std::find(bucket->begin(), bucket->end(), id) != bucket->end(); 25 | } 26 | void Remove(const size_t id) 27 | { 28 | auto idx = this->Hash(id); 29 | std::unique_lock lk(this->_mutexes[idx]); 30 | this->_buckets[idx].remove(id); 31 | } 32 | 33 | private: 34 | size_t Hash(const size_t id) { return id % nBucket; } 35 | 36 | private: 37 | std::shared_mutex _mutexes[nBucket]; 38 | std::list _buckets[nBucket]; 39 | 40 | }; 41 | 42 | } 43 | 44 | 45 | 46 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_SET_H -------------------------------------------------------------------------------- /ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task.h: -------------------------------------------------------------------------------- 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_H 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "retrieve_task_waiter.h" 9 | #include "computation_task.h" 10 | 11 | namespace KVStar { 12 | 13 | enum DeviceType { 14 | CPU = 0, 15 | NPU, 16 | GPU, 17 | TYPE_END 18 | }; 19 | 20 | struct RetrieveTask { 21 | PlainTensor queryGroup; 22 | PlainTensor blkRepre; 23 | std::optional dPrunedIndex; 24 | 25 | int topK; 26 | int reqId; 27 | DeviceType deviceType; 28 | size_t allocTaskId; 29 | std::shared_ptr waiter; 30 | 31 | RetrieveTask( 32 | PlainTensor qGroup, PlainTensor bRepre, std::optional pIndex, 33 | int tK, int rId, DeviceType devType 34 | ) : queryGroup(std::move(qGroup)), 35 | blkRepre(std::move(bRepre)), 36 | dPrunedIndex(std::move(pIndex)), 37 | topK(tK), 38 | reqId(rId), 39 | deviceType(devType), 40 | allocTaskId(0) {} 41 | 42 | RetrieveTask() = default; 43 | RetrieveTask(RetrieveTask&& other) noexcept = default; 44 | RetrieveTask& operator=(RetrieveTask&& other) noexcept = default; 45 | }; 46 | 47 | } 48 | 49 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_H -------------------------------------------------------------------------------- /examples/ucm_config_example.yaml: -------------------------------------------------------------------------------- 1 | # UCM Configuration File Example 2 | # 3 | # This file demonstrates how to configure UCM using YAML. 4 | # You can use this config file by setting the path to this file in kv_connector_extra_config in launch script or command line like this: 5 | # kv_connector_extra_config={"UCM_CONFIG_FILE": "/workspace/unified-cache-management/examples/ucm_config_example.yaml"} 6 | # 7 | # Alternatively, you can still use kv_connector_extra_config in KVTransferConfig 8 | # for backward compatibility. 9 | 10 | # Connector name (e.g., "UcmNfsStore", "UcmDramStore") 11 | ucm_connectors: 12 | - ucm_connector_name: "UcmNfsStore" 13 | ucm_connector_config: 14 | storage_backends: "/mnt/test" 15 | use_direct: false 16 | 17 | load_only_first_rank: false 18 | 19 | # Enable UCM metrics so they can be monitored online via Grafana and Prometheus. 20 | # metrics_config_path: "/workspace/unified-cache-management/examples/metrics/metrics_configs.yaml" 21 | 22 | # Sparse attention configuration 23 | # Format 1: Dictionary format (for methods like ESA, KvComp) 24 | # ucm_sparse_config: 25 | # ESA: 26 | # init_window_sz: 1 27 | # local_window_sz: 2 28 | # min_blocks: 4 29 | # sparse_ratio: 0.3 30 | # retrieval_stride: 5 31 | # Or for GSA: 32 | # GSA: {} 33 | 34 | 35 | # Whether to use layerwise loading/saving (optional, default: True for UCMConnector) 36 | # use_layerwise: true 37 | # hit_ratio: 0.9 38 | 39 | -------------------------------------------------------------------------------- /ucm/store/detail/task/task_set.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_TASK_SET_H 25 | #define UNIFIEDCACHE_TASK_SET_H 26 | 27 | #include "template/hashset.h" 28 | 29 | namespace UC { 30 | 31 | class TaskSet : public HashSet {}; 32 | 33 | } // namespace UC 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/600-new-model.yml: -------------------------------------------------------------------------------- 1 | name: 🤗 Support request for new model supported from huggingface/modelscope/modelers on ucm 2 | description: Submit a proposal/request for a new model from huggingface/modelscope/modelers on ucm 3 | title: "[New Model]: " 4 | labels: ["new model"] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: > 10 | #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/ModelEngine-Group/unified-cache-management/issues?q=is%3Aissue%20sort%3Acreated-desc). 11 | 12 | #### We also highly recommend you read our official website first to know which model already supported. 13 | - type: textarea 14 | attributes: 15 | label: The model to consider. 16 | description: > 17 | A huggingface/modelscope/modelers url, pointing to the model, e.g. https://huggingface.co/openai-community/gpt2 . 18 | validations: 19 | required: true 20 | - type: textarea 21 | attributes: 22 | label: The closest model ucm already supports. 23 | description: > 24 | Here is the list of models already supported by unifiedcache in our official website . Which model is the most similar to the model you want to add support for? 25 | - type: textarea 26 | attributes: 27 | label: What's your difficulty of supporting the model you want? 28 | description: > 29 | For example, any new operators or new architecture? 30 | - type: markdown 31 | attributes: 32 | value: > 33 | Thanks for contributing 🎉! 34 | -------------------------------------------------------------------------------- /test/common/doc/LLMPerf.md: -------------------------------------------------------------------------------- 1 | # 📝 LLM 性能测试使用说明 2 | 3 | ## 🔧 功能概述 4 | 本测试用于评估 LLM 推理服务在不同负载下的性能表现,涵盖延迟、吞吐量、请求成功率等关键指标。 5 | 6 | ## 📌 测试参数说明 7 | 8 | | 参数 | 说明 | 示例 | 9 | |------|------|------| 10 | | `mean_input_tokens` | 平均输入 token 数 | `[2000, 3000]` | 11 | | `mean_output_tokens` | 平均输出 token 数 | `[200, 500]` | 12 | | `max_num_completed_requests` | 最大完成请求数 | `[8, 4]` | 13 | | `concurrent_requests` | 并发请求数 | `[8, 4]` | 14 | | `additional_sampling_params` | 额外采样参数(如 temperature) | `["{}", "{}"]` | 15 | | `hit_rate` | 缓存命中率 | `[0, 50]` | 16 | 17 | > ✅ 支持多组参数组合运行,自动执行多轮推理并收集统计结果。 18 | 19 | ## 📊 输出结果 20 | 21 | 测试完成后,将输出以下性能指标的统计值(每轮结果均记录): 22 | 23 | - **延迟指标**: 24 | - `inter_token_latency_s`(token 间延迟) 25 | - `ttft_s`(首个 token 延迟) 26 | - `end_to_end_latency_s`(端到端延迟) 27 | - 各项包含:P50、P90、P99、平均值 28 | 29 | - **吞吐量指标**: 30 | - `total_throughput`(总吞吐量) 31 | - `incremental_throughput`(增量吞吐量) 32 | 33 | - **其他指标**: 34 | - `num_completed_requests`(完成请求数) 35 | - `elapsed_time`(总耗时) 36 | - `incremental_time_delay`(增量时间延迟) 37 | 38 | ## ✅ 验证规则 39 | 40 | - 所有数值必须 > 0 41 | - 若出现 `None` 或 ≤ 0 的值,测试将标记为失败,并输出异常详情 42 | 43 | ## 📤 输出格式 44 | 45 | 返回一个字典,包含: 46 | ```python 47 | { 48 | "_name": "llmperf", 49 | "_data": { # 所有指标的列表 50 | "results_inter_token_latency_s_quantiles_p50": [...], 51 | "results_ttft_s_mean": [...], 52 | # ... 53 | } 54 | } 55 | ``` 56 | 57 | ## 🚀 使用方式 test/下运行 58 | 59 | # 按文件运行 60 | pytest test_uc_performance.py 61 | 62 | # 按阶段运行 63 | pytest --stage=0 64 | 65 | # 按特性运行 66 | pytest --feature=uc_performance_test 67 | 68 | > ⚠️ 确保已安装依赖:`pytest` 等模块。 -------------------------------------------------------------------------------- /ucm/store/detail/task/task_queue.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_TASK_QUEUE_H 25 | #define UNIFIEDCACHE_TASK_QUEUE_H 26 | 27 | #include "task_shard.h" 28 | 29 | namespace UC { 30 | 31 | class TaskQueue { 32 | public: 33 | virtual ~TaskQueue() = default; 34 | virtual void Push(std::list& shards) noexcept = 0; 35 | }; 36 | 37 | } // namespace UC 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/docs/prepare_videomme.md: -------------------------------------------------------------------------------- 1 | ## Prepare VideoMME Dataset 2 | 3 | 4 | ### Step 1: Download VideoMME dataset from [huggingface](https://huggingface.co/datasets/lmms-lab/Video-MME) 5 | ```bash 6 | git clone https://huggingface.co/datasets/lmms-lab/Video-MME 7 | ``` 8 | 9 | Denote the root directory of download VideoMME dataset as `videomme_root`, it should has the following structure: 10 | ``` 11 | ${videomme_root}/ 12 | ├── videomme/ 13 | ├── subtitle.zip 14 | ├── videos_chunked_01.zip 15 | ├── videos_chunked_02.zip 16 | ├── ... 17 | └── videos_chunked_20.zip 18 | ``` 19 | 20 | 21 | ### Step 2: Unzip everything 22 | ```bash 23 | cd ${videomme_root} 24 | unzip subtitle.zip 25 | for file in videos_chunked_*.zip; do 26 | unzip "$file" 27 | done 28 | ``` 29 | 30 | 31 | ### Step 3: Extract frames of all videos 32 | ```bash 33 | cd ${retake_root} 34 | python scripts/utils/frame_extraction.py \ 35 | --videofile_tpl ${videomme_root}/data/'*.mp4' \ 36 | --results_dir ${videomme_root}/video_25fps \ 37 | --fps 25 \ 38 | --num_workers 32 39 | ``` 40 | 41 | 42 | ### Step 4: Build VideoMME dataset 43 | ```bash 44 | cd ${retake_root} 45 | python scripts/utils/build_videomme_dataset.py \ 46 | --hf_qwen2vl7b_path ${PATH_TO_Qwen2_VL_7B_Instruct} \ 47 | --hf_root ${videomme_root} 48 | ``` 49 | Note that you can NOT modify folder `${videomme_root}/video_25fps` after this step, since the absolute path of extracted frames are written into annotation files `video_mme.json` and `video_mme_subtitle.json`: 50 | ``` 51 | retake_root/ 52 | ├── dataset/ 53 | ├── video_mme/ 54 | ├── video_mme_subtitle.json 55 | ├── video_mme.json 56 | ├── ... 57 | ``` -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/docs/prepare_lvbench.md: -------------------------------------------------------------------------------- 1 | ## Prepare LVBench Dataset 2 | 3 | 4 | ### Step 1: Download LVBench data from [huggingface](https://huggingface.co/datasets/THUDM/LVBench/tree/main) 5 | ```bash 6 | git clone https://huggingface.co/datasets/THUDM/LVBench # Contain annotations only 7 | git clone https://huggingface.co/datasets/AIWinter/LVBench # Contain videos only 8 | ``` 9 | Move all_files in `AIWinter/LVBench` into `THUDM/LVBench`. 10 | 11 | Denote the root directory of download LVBench dataset as `lvbench_root`, it should has the following structure: 12 | ``` 13 | ${lvbench_root}/ 14 | ├── docs/ 15 | ├── video_info.meta.jsonl 16 | ├── all_videos_split.zip.001 17 | ├── all_videos_split.zip.002 18 | ├── ... 19 | └── all_videos_split.zip.014 20 | ``` 21 | 22 | 23 | ### Step 2: Unzip everything 24 | ```bash 25 | cd ${lvbench_root} 26 | cat all_videos_split.zip.* > all_videos.zip 27 | unzip all_videos.zip 28 | ``` 29 | 30 | 31 | ### Step 3: Extract frames of all videos 32 | ```bash 33 | cd ${retake_root} 34 | python scripts/utils/frame_extraction.py \ 35 | --videofile_tpl ${lvbench_root}/all_videos/'*.mp4' \ 36 | --results_dir ${lvbench_root}/video_25fps \ 37 | --fps 25 \ 38 | --num_workers 32 39 | ``` 40 | 41 | 42 | ### Step 4: Build LVBench dataset 43 | ```bash 44 | cd ${retake_root} 45 | python scripts/utils/build_lvbench_dataset.py --hf_root ${lvbench_root} 46 | ``` 47 | Note that you can NOT modify folder `${lvbench_root}/video_25fps` after this step, since the absolute path of extracted frames are written into annotation files `lvbench.json`: 48 | ``` 49 | retake_root/ 50 | ├── dataset/ 51 | ├── lvbench/ 52 | ├── lvbench.json 53 | ├── ... 54 | ``` -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/configs/kvcomp_deepseek_v2_lite_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_name": "DeepSeek/DeepSeek-V2-Lite-Chat", 3 | "is_mla": true, 4 | "hash_weight_type": "random", 5 | "num_hidden_layers": 27, 6 | "seq_len_threshhold": 2048, 7 | "chunk_size": 128, 8 | "chunk_repre_method": "max", 9 | "head_dim": 576, 10 | "hash_bits": 128, 11 | "top_k_ratio_per_layer": [ 12 | 1, 13 | 1, 14 | 0.3, 15 | 0.3, 16 | 0.3, 17 | 0.3, 18 | 0.3, 19 | 0.3, 20 | 0.3, 21 | 0.3, 22 | 0.3, 23 | 0.3, 24 | 0.3, 25 | 0.3, 26 | 0.3, 27 | 0.3, 28 | 0.3, 29 | 0.3, 30 | 0.3, 31 | 0.3, 32 | 0.3, 33 | 0.3, 34 | 0.3, 35 | 0.3, 36 | 1, 37 | 1, 38 | 1 39 | ], 40 | "top_k_index_reuse": [ 41 | -1, 42 | -1, 43 | -1, 44 | -1, 45 | -1, 46 | -1, 47 | -1, 48 | -1, 49 | -1, 50 | -1, 51 | -1, 52 | -1, 53 | -1, 54 | -1, 55 | -1, 56 | -1, 57 | -1, 58 | -1, 59 | -1, 60 | -1, 61 | -1, 62 | -1, 63 | -1, 64 | -1, 65 | -1, 66 | -1, 67 | -1 68 | ], 69 | "must_select_blocks": [ 70 | 0, 71 | -2, 72 | -1 73 | ], 74 | "hash_weight": null, 75 | "kv_lora_rank": 512, 76 | "qk_rope_head_dim": 64, 77 | "hash_bits_kv_lora": 512, 78 | "hash_bits_qk_rope": 64, 79 | "hash_weight_kv_lora": null, 80 | "hash_weight_qk_rope": null 81 | } -------------------------------------------------------------------------------- /test/suites/E2E/test_evaluator.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | 3 | import pytest 4 | from common.capture_utils import export_vars 5 | from common.config_utils import config_utils as config_instance 6 | from common.uc_eval.task import DocQaEvalTask 7 | from common.uc_eval.utils.data_class import EvalConfig, ModelConfig 8 | 9 | 10 | @pytest.fixture(scope="session") 11 | def model_config() -> ModelConfig: 12 | cfg = config_instance.get_config("models") or {} 13 | field_name = [field.name for field in dataclasses.fields(ModelConfig)] 14 | kwargs = {k: v for k, v in cfg.items() if k in field_name and v is not None} 15 | return ModelConfig(**kwargs) 16 | 17 | 18 | doc_qa_eval_cases = [ 19 | pytest.param( 20 | EvalConfig( 21 | data_type="doc_qa", 22 | dataset_file_path="common/uc_eval/datasets/doc_qa/demo.jsonl", 23 | enable_prefix_cache=False, 24 | parallel_num=1, 25 | benchmark_mode="evaluate", 26 | metrics=["accuracy", "bootstrap-accuracy", "f1-score"], 27 | eval_class="common.uc_eval.utils.metric:Includes", 28 | ), 29 | id="doc-qa-complete-recalculate-evaluate", 30 | ) 31 | ] 32 | 33 | 34 | @pytest.mark.feature("eval_test") 35 | @pytest.mark.stage(2) 36 | @pytest.mark.parametrize("eval_config", doc_qa_eval_cases) 37 | @export_vars 38 | def test_doc_qa_perf( 39 | eval_config: EvalConfig, model_config: ModelConfig, request: pytest.FixtureRequest 40 | ): 41 | file_save_path = config_instance.get_config("reports").get("base_dir") 42 | task = DocQaEvalTask(model_config, eval_config, file_save_path) 43 | result = task.run() 44 | return {"_name": request.node.callspec.id, "_data": result} 45 | -------------------------------------------------------------------------------- /ucm/shared/trans/device.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_TRANS_DEVICE_H 25 | #define UNIFIEDCACHE_TRANS_DEVICE_H 26 | 27 | #include "buffer.h" 28 | #include "stream.h" 29 | 30 | namespace UC::Trans { 31 | 32 | class Device { 33 | public: 34 | Status Setup(int32_t deviceId); 35 | std::unique_ptr MakeStream(); 36 | std::unique_ptr MakeSMStream(); 37 | std::unique_ptr MakeBuffer(); 38 | }; 39 | 40 | } // namespace UC::Trans 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /ucm/shared/infra/time/now_time.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_SHARED_INFRA_TIME_NOW_TIME_H 25 | #define UNIFIEDCACHE_SHARED_INFRA_TIME_NOW_TIME_H 26 | 27 | #include 28 | 29 | namespace UC { 30 | 31 | class NowTime { 32 | public: 33 | static auto Now() 34 | { 35 | auto now = std::chrono::steady_clock::now().time_since_epoch(); 36 | return std::chrono::duration(now).count(); 37 | } 38 | }; 39 | 40 | } // namespace UC 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /ucm/shared/trans/cuda/cuda_buffer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_TRANS_CUDA_BUFFER_H 25 | #define UNIFIEDCACHE_TRANS_CUDA_BUFFER_H 26 | 27 | #include "trans/detail/reserved_buffer.h" 28 | 29 | namespace UC::Trans { 30 | 31 | class CudaBuffer : public ReservedBuffer { 32 | public: 33 | std::shared_ptr MakeDeviceBuffer(size_t size) override; 34 | std::shared_ptr MakeHostBuffer(size_t size) override; 35 | }; 36 | 37 | } // namespace UC::Trans 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /ucm/shared/trans/simu/simu_buffer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_TRANS_SIMU_BUFFER_H 25 | #define UNIFIEDCACHE_TRANS_SIMU_BUFFER_H 26 | 27 | #include "trans/detail/reserved_buffer.h" 28 | 29 | namespace UC::Trans { 30 | 31 | class SimuBuffer : public ReservedBuffer { 32 | public: 33 | std::shared_ptr MakeDeviceBuffer(size_t size) override; 34 | std::shared_ptr MakeHostBuffer(size_t size) override; 35 | }; 36 | 37 | } // namespace UC::Trans 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /ucm/shared/trans/ascend/ascend_buffer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_TRANS_ASCEND_BUFFER_H 25 | #define UNIFIEDCACHE_TRANS_ASCEND_BUFFER_H 26 | 27 | #include "trans/detail/reserved_buffer.h" 28 | 29 | namespace UC::Trans { 30 | 31 | class AscendBuffer : public ReservedBuffer { 32 | public: 33 | std::shared_ptr MakeDeviceBuffer(size_t size) override; 34 | std::shared_ptr MakeHostBuffer(size_t size) override; 35 | }; 36 | 37 | } // namespace UC::Trans 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | project(unified-cache-management VERSION 1.0.0 LANGUAGES CXX) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 6 | set(CMAKE_CXX_EXTENSIONS OFF) 7 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 8 | 9 | option(BUILD_UCM_STORE "build ucm store module." ON) 10 | option(BUILD_UCM_SPARSE "build ucm sparse module." OFF) 11 | option(BUILD_UNIT_TESTS "build all unit test suits." OFF) 12 | option(BUILD_NUMA "build numactl library." OFF) 13 | option(DOWNLOAD_DEPENDENCE "download dependence by cmake." ON) 14 | set(RUNTIME_ENVIRONMENT "simu" CACHE STRING "runtime: simu, ascend, musa or cuda.") 15 | set(LOGGER_BACKEND "spdlog" CACHE STRING "backend: spdlog or flux.") 16 | 17 | execute_process(COMMAND git rev-parse HEAD OUTPUT_VARIABLE UCM_COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE) 18 | add_compile_definitions(UCM_PROJECT_NAME="${PROJECT_NAME}") 19 | add_compile_definitions(UCM_PROJECT_VERSION="${PROJECT_VERSION}") 20 | add_compile_definitions(UCM_COMMIT_ID="${UCM_COMMIT_ID}") 21 | add_compile_definitions(UCM_BUILD_TYPE="${CMAKE_BUILD_TYPE}") 22 | 23 | set(CMAKE_SKIP_RPATH TRUE) 24 | set(FLAGS_PUBLIC "-Wall -Werror -fPIC -Wl,-z,relro,-z,now") 25 | set(FLAGS_DEBUG "-O0 -g") 26 | set(FLAGS_RELEASE "-O3 -D_FORTIFY_SOURCE=2") 27 | string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWER) 28 | if(CMAKE_BUILD_TYPE_LOWER STREQUAL "debug") 29 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_PUBLIC} ${FLAGS_DEBUG}") 30 | else() 31 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_PUBLIC} ${FLAGS_RELEASE}") 32 | endif() 33 | if(BUILD_UNIT_TESTS) 34 | enable_testing() 35 | endif() 36 | 37 | add_subdirectory(ucm) 38 | if(BUILD_UNIT_TESTS) 39 | add_subdirectory(test) 40 | endif() 41 | -------------------------------------------------------------------------------- /ucm/shared/infra/template/singleton.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_INFRA_SINGLETON_H 25 | #define UNIFIEDCACHE_INFRA_SINGLETON_H 26 | 27 | namespace UC { 28 | 29 | template 30 | class Singleton { 31 | public: 32 | Singleton(const Singleton&) = delete; 33 | Singleton& operator=(const Singleton&) = delete; 34 | static T* Instance() 35 | { 36 | static T t; 37 | return &t; 38 | } 39 | 40 | private: 41 | Singleton() = default; 42 | }; 43 | 44 | } // namespace UC 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /ucm/store/nfsstore/cc/domain/hotness/hotness_set.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | 25 | #ifndef UNIFIEDCACHE_HOTNESS_SET_H 26 | #define UNIFIEDCACHE_HOTNESS_SET_H 27 | 28 | #include 29 | #include 30 | #include "space/space_layout.h" 31 | 32 | namespace UC { 33 | 34 | class HotnessSet { 35 | public: 36 | void Insert(const std::string& blockId); 37 | void UpdateHotness(const SpaceLayout* spaceLayout); 38 | 39 | private: 40 | std::mutex mutex_; 41 | std::unordered_set pendingBlocks_; 42 | }; 43 | 44 | 45 | } // namespace UC 46 | 47 | #endif -------------------------------------------------------------------------------- /ucm/sparse/gsa/prefetch/src/pybinds.cpp: -------------------------------------------------------------------------------- 1 | #pragma GCC diagnostic push 2 | #include 3 | #include 4 | #include 5 | #include 6 | #pragma GCC diagnostic pop 7 | #include "kvcache_pre.h" 8 | 9 | namespace ucmprefetch { 10 | PYBIND11_MODULE(gsa_prefetch, m) 11 | { 12 | pybind11::class_(m, "GSAPrefetchEngineC") 13 | .def(pybind11::init&, bool, bool, int, int, int, bool>()) 15 | .def("set_blocks_map", &ucmprefetch::GSAPrefetchEngineC::SetBlocksMap) 16 | .def("set_blocks_map_multilayer", &ucmprefetch::GSAPrefetchEngineC::SetBlocksMapMultiLayer) 17 | .def("add_blocks_map", &ucmprefetch::GSAPrefetchEngineC::AddBlocksMap) 18 | .def("del_blocks_map", &ucmprefetch::GSAPrefetchEngineC::DelBlocksMap) 19 | .def("run_async_prefetch_bs", &ucmprefetch::GSAPrefetchEngineC::RunAsyncPrefetchBs) 20 | .def("set_blocks_table_info", &ucmprefetch::GSAPrefetchEngineC::SetBlockTableInfo) 21 | .def("get_prefetch_status", &ucmprefetch::GSAPrefetchEngineC::GetPrefetchStatus) 22 | .def("set_prefetch_status", &ucmprefetch::GSAPrefetchEngineC::SetPrefetchStatus) 23 | .def("set_modelrunning_status", &ucmprefetch::GSAPrefetchEngineC::SetModelRunningStatus) 24 | .def("obtain_load_blocks", &ucmprefetch::GSAPrefetchEngineC::ObtainLoadBlocks) 25 | .def("obtain_miss_idxs", &ucmprefetch::GSAPrefetchEngineC::ObtainMissIdxs) 26 | .def("obtain_docs_map", &ucmprefetch::GSAPrefetchEngineC::ObtainDocsMap) 27 | .def("obtain_blocks_map", &ucmprefetch::GSAPrefetchEngineC::ObtainBlocksMap); 28 | } 29 | } // namespace ucmprefetch 30 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/scripts/submission/prepare_videomme_submission.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pandas as pd 5 | 6 | predict_result_dir = "results/path_to_results" 7 | output_file = "./ReTaKe_videomme_submission.json" 8 | 9 | 10 | videomme_hf_root = "/Video-MME/origin_data" 11 | data_root = "./dataset" 12 | 13 | 14 | ################ DO NOT CHANGE ################ 15 | annos = pd.read_parquet( 16 | os.path.join(videomme_hf_root, "videomme", "test-00000-of-00001.parquet") 17 | ) 18 | with open(os.path.join(predict_result_dir, "generated_predictions.jsonl"), "r") as f: 19 | responses = [json.loads(line) for line in f.readlines()] 20 | 21 | video_id2results = {} 22 | for idx, row in annos.iterrows(): 23 | video_id = row["video_id"] 24 | if video_id in video_id2results: 25 | video_results = video_id2results[video_id] 26 | else: 27 | video_results = dict( 28 | video_id=video_id, 29 | duration=row["duration"], 30 | domain=row["domain"], 31 | sub_category=row["sub_category"], 32 | ) 33 | questions = video_results.get("questions", []) 34 | questions.append( 35 | dict( 36 | question_id=row["question_id"], 37 | task_type=row["task_type"], 38 | question=row["question"], 39 | options=row["options"].tolist(), 40 | answer=row["answer"], 41 | response=responses[idx]["predict"], 42 | ) 43 | ) 44 | video_results["questions"] = questions 45 | video_id2results[video_id] = video_results 46 | 47 | submission_results = [] 48 | for video_results in video_id2results.values(): 49 | submission_results.append(video_results) 50 | 51 | 52 | with open(output_file, "w") as f: 53 | json.dump(submission_results, f, indent=2) 54 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/docs/prepare_longvideobench.md: -------------------------------------------------------------------------------- 1 | ****## Prepare LongVideoBench Dataset 2 | 3 | 4 | ### Step 1: Download LongVideoBench dataset from [huggingface](git clone https://huggingface.co/datasets/longvideobench/LongVideoBench) 5 | ```bash 6 | git clone git clone https://huggingface.co/datasets/longvideobench/LongVideoBench 7 | ``` 8 | 9 | Denote the root directory of download LongVideoBench dataset as `longvideobench_root`, it should has the following structure: 10 | ``` 11 | ${longvideobench_root}/ 12 | ├── subtitles.zip 13 | ├── test-00000-of-00001.parquet 14 | ├── validation-00000-of-00001.parquet 15 | ├── videos.tar.part.aa 16 | ├── ... 17 | └── videos.tar.part.be 18 | ├── ... 19 | ``` 20 | 21 | 22 | ### Step 2: Unzip everything 23 | ```bash 24 | cd ${longvideobench_root} 25 | tar -xvf subtitles.tar 26 | cat videos.tar.part.* > videos.tar 27 | tar -xvf videos.tar 28 | ``` 29 | 30 | 31 | ### Step 3: Extract frames of all videos 32 | ```bash 33 | cd ${retake_root} 34 | python scripts/utils/frame_extraction.py \ 35 | --videofile_tpl ${longvideobench_root}/videos/'*.mp4' \ 36 | --results_dir ${longvideobench_root}/video_25fps \ 37 | --fps 25 \ 38 | --num_workers 32 39 | ``` 40 | 41 | 42 | ### Step 4: Build LongVideoBench dataset 43 | ```bash 44 | cd ${retake_root} 45 | python scripts/utils/build_longvideobench_dataset.py \ 46 | --hf_root ${longvideobench_root} \ 47 | --hf_qwen2vl7b_path ${PATH_TO_Qwen2_VL_7B_Instruct} 48 | ``` 49 | Note that you can NOT modify folder `${longvideobench_root}/video_25fps` after this step, since the absolute path of extracted frames are written into annotation files `longvideobench_val.json` and `longvideobench_test.json`: 50 | ``` 51 | retake_root/ 52 | ├── dataset/ 53 | ├── longvideobench/ 54 | ├── longvideobench_val.json 55 | ├── longvideobench_test.json 56 | ├── ... 57 | ``` -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/about-codeowners/ 2 | # for more info about CODEOWNERS file 3 | 4 | * @mag1c-h @ygwpz @FangRun2 @Tarrei 5 | /.github @Wwwzff @hek14 @ygwpz @mag1c-h @FangRun2 @Tarrei 6 | 7 | /ucm/sparse @wuhuxiao @wangwenxin0312 @hek14 @ygwpz @mag1c-h 8 | /ucm/sparse/cache_blend @wuhuxiao @hek14 @ygwpz @mag1c-h 9 | /ucm/sparse/esa @wangwenxin0312 @hek14 @ygwpz @mag1c-h 10 | /ucm/sparse/gsa @Zbm1996 @zbb200819 @yxkyong @HaoLi980405 @wuhuxiao @hek14 @ygwpz @mag1c-h 11 | /ucm/sparse/kvcomp @leideng @pengwwang @wuhuxiao @hek14 @ygwpz @mag1c-h 12 | /ucm/sparse/kvstar @saki-daisuki @summer-ai007 @xwLearnsLLM @wuhuxiao @hek14 @ygwpz @mag1c-h 13 | 14 | /ucm/store @mag1c-h @ygwpz 15 | /ucm/store/dramstore @harrisonyhq @mag1c-h @ygwpz 16 | /ucm/store/localstore @mag1c-h @ygwpz 17 | /ucm/store/mooncakestore @chinesezyc @mag1c-h @ygwpz 18 | /ucm/store/nfsstore @mag1c-h @ygwpz 19 | 20 | /ucm/integration @qyh111 @harrisonyhq @ygwpz @mag1c-h @hek14 21 | 22 | /ucm/pd @flesher0813 @ygwpz @mag1c-h 23 | 24 | /ucm/sandbox @Wwwzff @hek14 @ygwpz @mag1c-h @FangRun2 @Tarrei 25 | 26 | /benchmarks @flesher0813 @ygwpz @mag1c-h 27 | 28 | /docker @harrisonyhq @ygwpz @mag1c-h 29 | 30 | /docs @flesher0813 @ygwpz @mag1c-h @FangRun2 @Tarrei @hek14 31 | /docs/source/user-guide/sparse-attention/esa.md @wangwenxin0312 @hek14 @flesher0813 @ygwpz @mag1c-h @FangRun2 @Tarrei 32 | /docs/source/user-guide/sparse-attention/gsa.md @Zbm1996 @zbb200819 @yxkyong @HaoLi980405 @flesher0813 @ygwpz @mag1c-h @FangRun2 @Tarrei 33 | /docs/source/user-guide/sparse-attention/kvcomp.md @leideng @pengwwang @flesher0813 @ygwpz @mag1c-h @FangRun2 @Tarrei 34 | /docs/source/user-guide/sparse-attention/kvstar.md @saki-daisuki @summer-ai007 @flesher0813 @ygwpz @mag1c-h @FangRun2 @Tarrei 35 | 36 | /examples @harrisonyhq @ygwpz @mag1c-h @hek14 37 | 38 | /test @Wwwzff @ygwpz @mag1c-h 39 | -------------------------------------------------------------------------------- /ucm/shared/metrics/cc/stats/istats.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_ISTATS_H 25 | #define UNIFIEDCACHE_ISTATS_H 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | namespace UC::Metrics { 33 | 34 | class IStats { 35 | public: 36 | virtual ~IStats() = default; 37 | virtual std::string Name() const = 0; 38 | virtual void Update(const std::unordered_map& params) = 0; 39 | virtual void Reset() = 0; 40 | virtual std::unordered_map> Data() = 0; 41 | }; 42 | 43 | } // namespace UC::Metrics 44 | 45 | #endif -------------------------------------------------------------------------------- /ucm/store/detail/task/task_waiter.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_ITASK_WAITER_H 25 | #define UNIFIEDCACHE_ITASK_WAITER_H 26 | 27 | #include "thread/latch.h" 28 | 29 | namespace UC { 30 | 31 | class TaskWaiter : public Latch { 32 | public: 33 | TaskWaiter(const size_t expected, const double startTp) : Latch{} 34 | { 35 | this->startTp = startTp; 36 | Set(expected); 37 | } 38 | using Latch::Wait; 39 | virtual bool Wait(const size_t timeoutMs) noexcept { return WaitFor(timeoutMs); } 40 | virtual bool Finish() noexcept { return Check(); } 41 | }; 42 | 43 | } // namespace UC 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/300-usage.yml: -------------------------------------------------------------------------------- 1 | name: 💻 Usage 2 | description: Raise an issue here if you don't know how to use ucm. 3 | title: "[Usage]: " 4 | labels: ["usage"] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: > 10 | #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/ModelEngine-Group/unified-cache-management/issues?q=is%3Aissue%20sort%3Acreated-desc). 11 | - type: textarea 12 | attributes: 13 | label: Your current environment 14 | description: | 15 | Please run the following and paste the output below. 16 | **TODO: Add script to our project to collect the unifiedcache runtime environment, this following example comes from vllm-ascend** 17 | ```sh 18 | npu-smi info 19 | cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info 20 | wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py 21 | # For security purposes, please feel free to check the contents of collect_env.py before running it. 22 | python collect_env.py 23 | ``` 24 | It is suggested to download and execute the latest script, as vllm might frequently update the diagnosis information needed for accurately and quickly responding to issues. 25 | value: | 26 | ```text 27 | The output of above commands 28 | ``` 29 | validations: 30 | required: true 31 | - type: textarea 32 | attributes: 33 | label: How would you like to use ucm. 34 | description: | 35 | A detailed description of how you want to use unifiedcache. 36 | value: | 37 | I want to run inference of a [specific model](put link here). I don't know how to integrate it with unified. 38 | - type: markdown 39 | attributes: 40 | value: > 41 | Thanks for contributing 🎉! 42 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/200-installation.yml: -------------------------------------------------------------------------------- 1 | name: 🛠️ Installation 2 | description: Report an issue here when you hit errors during installation. 3 | title: "[Installation]: " 4 | labels: ["installation"] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: > 10 | #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/ModelEngine-Group/unified-cache-management/issues?q=is%3Aissue%20sort%3Acreated-desc). 11 | - type: textarea 12 | attributes: 13 | label: Your current environment 14 | description: | 15 | Please run the following and paste the output below. 16 | **TODO: Add script to our project to collect the unifiedcache runtime environment, this following example comes from vllm-ascend** 17 | ```sh 18 | npu-smi info 19 | cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info 20 | wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py 21 | # For security purposes, please feel free to check the contents of collect_env.py before running it. 22 | python collect_env.py 23 | ``` 24 | It is suggested to download and execute the latest script, as vllm might frequently update the diagnosis information needed for accurately and quickly responding to issues. 25 | value: | 26 | ```text 27 | The output of `python collect_env.py` 28 | ``` 29 | validations: 30 | required: true 31 | - type: textarea 32 | attributes: 33 | label: How you are installing ucm, also vllm and vllm-ascend. 34 | description: | 35 | Paste the full command you are trying to execute. 36 | value: | 37 | ```sh 38 | pip install -vvv unifiedcache 39 | ``` 40 | - type: markdown 41 | attributes: 42 | value: > 43 | Thanks for contributing 🎉! 44 | -------------------------------------------------------------------------------- /ucm/store/nfsstore/cc/domain/space/space_shard_temp_layout.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_SPACE_SHARD_TEMP_LAYOUT_H 25 | #define UNIFIEDCACHE_SPACE_SHARD_TEMP_LAYOUT_H 26 | 27 | #include "space_shard_layout.h" 28 | 29 | namespace UC { 30 | 31 | class SpaceShardTempLayout : public SpaceShardLayout { 32 | public: 33 | std::string DataFileParent(const std::string& blockId, bool activated) const override; 34 | std::string DataFilePath(const std::string& blockId, bool activated) const override; 35 | 36 | protected: 37 | std::vector RelativeRoots() const override; 38 | virtual std::string TempDataFileRoot() const; 39 | }; 40 | 41 | } // namespace UC 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /ucm/shared/trans/cuda/cuda_sm_kernel.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_TRANS_CUDA_SM_KERNEL_H 25 | #define UNIFIEDCACHE_TRANS_CUDA_SM_KERNEL_H 26 | 27 | #include 28 | #include 29 | 30 | namespace UC::Trans { 31 | 32 | cudaError_t CudaSMCopyAsync(void* src[], void* dst[], size_t size, size_t number, 33 | cudaStream_t stream); 34 | cudaError_t CudaSMCopyAsync(void* src[], void* dst, size_t size, size_t number, 35 | cudaStream_t stream); 36 | cudaError_t CudaSMCopyAsync(void* src, void* dst[], size_t size, size_t number, 37 | cudaStream_t stream); 38 | 39 | } // namespace UC::Trans 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /ucm/shared/trans/cuda/cuda_sm_stream.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_TRANS_CUDA_SM_STREAM_H 25 | #define UNIFIEDCACHE_TRANS_CUDA_SM_STREAM_H 26 | 27 | #include "cuda_stream.h" 28 | 29 | namespace UC::Trans { 30 | 31 | class CudaSmStream : public CudaStream { 32 | public: 33 | Status DeviceToHostAsync(void* device[], void* host[], size_t size, size_t number) override; 34 | Status DeviceToHostAsync(void* device[], void* host, size_t size, size_t number) override; 35 | Status HostToDeviceAsync(void* host[], void* device[], size_t size, size_t number) override; 36 | Status HostToDeviceAsync(void* host, void* device[], size_t size, size_t number) override; 37 | }; 38 | 39 | } // namespace UC::Trans 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /ucm/store/pcstore/cc/domain/space/space_manager.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_SPACE_MANAGER_H 25 | #define UNIFIEDCACHE_SPACE_MANAGER_H 26 | 27 | #include "space_layout.h" 28 | 29 | namespace UC { 30 | 31 | class SpaceManager { 32 | public: 33 | Status Setup(const std::vector& storageBackends, const size_t blockSize); 34 | Status NewBlock(const std::string& blockId); 35 | Status CommitBlock(const std::string& blockId, bool success); 36 | bool LookupBlock(const std::string& blockId) const; 37 | const SpaceLayout* GetSpaceLayout() const { return &this->layout_; } 38 | 39 | private: 40 | SpaceLayout layout_; 41 | size_t blockSize_; 42 | }; 43 | 44 | } // namespace UC 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /ucm/sparse/gsa/offload_ops/src/k_repre.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "k_repre.h" 4 | 5 | 6 | namespace KRepre { 7 | #define OMP_THREAD_NUM 32u 8 | 9 | const VecProductClass& KRepreComputer::ThreadLocalVecProduct::GetInstance() 10 | { 11 | thread_local static VecProductClass instance; 12 | return instance; 13 | } 14 | 15 | void KRepreComputer::ComputeKRepreBlock(const float* __restrict kArray, 16 | uint32_t kHead, 17 | uint32_t blockSize, 18 | uint32_t headSize, 19 | float* __restrict kRepreBlock) const 20 | { 21 | // 获取本地线程实例 22 | const auto& vecProduct = ThreadLocalVecProduct::GetInstance(); 23 | 24 | for (uint32_t idxHead = 0; idxHead < kHead; ++idxHead) { 25 | const float* kArraySingleHead = kArray + idxHead * blockSize * headSize; 26 | float* kRepreBlockSingleHead = kRepreBlock + idxHead * headSize; 27 | 28 | vecProduct.VectorMean( 29 | kArraySingleHead, 30 | kRepreBlockSingleHead, 31 | headSize, 32 | blockSize 33 | ); 34 | } 35 | } 36 | 37 | void KRepreComputer::ComputeKRepre(const std::vector& kArray, 38 | uint32_t numBlock, 39 | uint32_t kHead, 40 | uint32_t blockSize, 41 | uint32_t headSize, 42 | const std::vector& kRepreBlockArray) const 43 | { 44 | #pragma omp parallel for num_threads(OMP_THREAD_NUM) 45 | for (uint32_t idxBlock = 0; idxBlock < numBlock; ++idxBlock) { 46 | const float* kArrayCurrentBlock = kArray[idxBlock]; 47 | float * KRepreCurrentBlock = kRepreBlockArray[idxBlock]; 48 | 49 | ComputeKRepreBlock( 50 | kArrayCurrentBlock, 51 | kHead, 52 | blockSize, 53 | headSize, 54 | KRepreCurrentBlock 55 | ); 56 | } 57 | } 58 | } -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/configs/kvcomp_qwen3_4B_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_name": "Qwen/Qwen3-4B", 3 | "is_mla": false, 4 | "hash_weight_type": "random", 5 | "num_hidden_layers": 36, 6 | "seq_len_threshhold": 2048, 7 | "chunk_size": 128, 8 | "chunk_repre_method": "max", 9 | "head_dim": 128, 10 | "hash_bits": 128, 11 | "top_k_ratio_per_layer": [ 12 | 1, 13 | 1, 14 | 0.3, 15 | 0.3, 16 | 0.3, 17 | 0.3, 18 | 0.3, 19 | 0.3, 20 | 0.3, 21 | 0.3, 22 | 0.3, 23 | 0.3, 24 | 0.3, 25 | 0.3, 26 | 0.3, 27 | 0.3, 28 | 0.3, 29 | 0.3, 30 | 0.3, 31 | 0.3, 32 | 0.3, 33 | 0.3, 34 | 0.3, 35 | 0.3, 36 | 0.3, 37 | 0.3, 38 | 0.3, 39 | 0.3, 40 | 0.3, 41 | 0.3, 42 | 0.3, 43 | 0.3, 44 | 0.3, 45 | 1, 46 | 1, 47 | 1 48 | ], 49 | "top_k_index_reuse": [ 50 | -1, 51 | -1, 52 | -1, 53 | -1, 54 | -1, 55 | -1, 56 | -1, 57 | -1, 58 | -1, 59 | -1, 60 | -1, 61 | -1, 62 | -1, 63 | -1, 64 | -1, 65 | -1, 66 | -1, 67 | -1, 68 | -1, 69 | -1, 70 | -1, 71 | -1, 72 | -1, 73 | -1, 74 | -1, 75 | -1, 76 | -1, 77 | -1, 78 | -1, 79 | -1, 80 | -1, 81 | -1, 82 | -1, 83 | -1, 84 | -1, 85 | -1 86 | ], 87 | "must_select_blocks": [ 88 | 0, 89 | -2, 90 | -1 91 | ], 92 | "hash_weight": null, 93 | "kv_lora_rank": null, 94 | "qk_rope_head_dim": null, 95 | "hash_bits_kv_lora": null, 96 | "hash_bits_qk_rope": null, 97 | "hash_weight_kv_lora": null, 98 | "hash_weight_qk_rope": null 99 | } -------------------------------------------------------------------------------- /ucm/store/nfsstore/cc/domain/space/space_property.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | 25 | #ifndef UNIFIEDCACHE_SPACE_PROPERTY_H 26 | #define UNIFIEDCACHE_SPACE_PROPERTY_H 27 | 28 | #include "file/ifile.h" 29 | #include "status/status.h" 30 | 31 | namespace UC { 32 | 33 | class SpaceProperty { 34 | public: 35 | ~SpaceProperty(); 36 | Status Setup(const std::string& propertyFilePath); 37 | void IncreaseCapacity(const size_t delta); 38 | void DecreaseCapacity(const size_t delta); 39 | size_t GetCapacity() const; 40 | 41 | private: 42 | Status InitShmProperty(IFile* shmPropertyFile); 43 | Status LoadShmProperty(IFile* shmPropertyFile); 44 | 45 | private: 46 | void* addr_{nullptr}; 47 | }; 48 | 49 | } // namespace UC 50 | 51 | #endif -------------------------------------------------------------------------------- /ucm/shared/infra/time/stopwatch.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_INFRA_STOPWATCH_H 25 | #define UNIFIEDCACHE_INFRA_STOPWATCH_H 26 | 27 | #include 28 | 29 | namespace UC { 30 | 31 | class StopWatch { 32 | using clock = std::chrono::steady_clock; 33 | std::chrono::time_point startTp_; 34 | 35 | public: 36 | StopWatch() : startTp_{clock::now()} {} 37 | std::chrono::duration Elapsed() const 38 | { 39 | return std::chrono::duration(clock::now() - startTp_); 40 | } 41 | std::chrono::milliseconds ElapsedMs() const 42 | { 43 | return std::chrono::duration_cast(clock::now() - startTp_); 44 | } 45 | void Reset() { startTp_ = clock::now(); } 46 | }; 47 | 48 | } // namespace UC 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = "Unified Cache Manager" 10 | copyright = "2025, Unified Cache Manager Team" 11 | author = "Unified Cache Manager Team" 12 | release = "" 13 | 14 | # -- General configuration --------------------------------------------------- 15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 16 | 17 | # Copy from https://github.com/vllm-project/vllm/blob/main/docs/source/conf.py 18 | extensions = [ 19 | "sphinx.ext.napoleon", 20 | "sphinx.ext.intersphinx", 21 | "sphinx_copybutton", 22 | "sphinx.ext.autodoc", 23 | "sphinx.ext.autosummary", 24 | "myst_parser", 25 | "sphinxarg.ext", 26 | "sphinx_design", 27 | "sphinx_togglebutton", 28 | "sphinx_substitution_extensions", 29 | ] 30 | 31 | myst_enable_extensions = ["colon_fence", "substitution"] 32 | 33 | # templates_path = ['_templates'] 34 | exclude_patterns = [] 35 | 36 | 37 | # -- Options for HTML output ------------------------------------------------- 38 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 39 | 40 | html_title = project 41 | html_theme = "sphinx_book_theme" 42 | html_static_path = ["_static"] 43 | html_css_files = ["css/logo.css"] 44 | html_theme_options = { 45 | "path_to_docs": "docs/source", 46 | "repository_url": "https://github.com/ModelEngine-Group/unified-cache-management", 47 | "use_repository_button": True, 48 | "use_edit_page_button": True, 49 | "logo": { 50 | "image_light": "logos/UCM-light.png", 51 | "image_dark": "logos/UCM-dark.png", 52 | "alt_text": "UCM", 53 | }, 54 | } 55 | 56 | # language = 'zh_CN' 57 | -------------------------------------------------------------------------------- /ucm/shared/metrics/test/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # MIT License 4 | # 5 | # Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | # 25 | 26 | 27 | import os 28 | import sys 29 | 30 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 31 | from ucm.shared.metrics import ucmmonitor 32 | 33 | # import monitor 34 | 35 | mon = ucmmonitor.StatsMonitor.get_instance() 36 | mon.update_stats( 37 | "ConnStats", 38 | { 39 | "save_duration": 1.2, 40 | "save_speed": 300.5, 41 | "load_duration": 0.8, 42 | "load_speed": 450.0, 43 | "interval_lookup_hit_rates": 0.95, 44 | }, 45 | ) 46 | mon.update_stats( 47 | "ConnStats", 48 | { 49 | "save_duration": 1.2, 50 | "save_speed": 300.5, 51 | "load_duration": 0.8, 52 | "load_speed": 450.0, 53 | "interval_lookup_hit_rates": 0.95, 54 | }, 55 | ) 56 | 57 | data = mon.get_stats("ConnStats") 58 | print(data) 59 | -------------------------------------------------------------------------------- /ucm/sparse/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | 5 | DEFAULT_BLOCK_SIZE = 128 6 | MIN_TOPK_LEN = 32 7 | MAX_TOPK_LEN = 48 8 | MAX_BS = 256 9 | SEG_PREFILL_THRESHOLD = 8400 10 | CUDA_TOPK = False 11 | PTOPK_PREFETCH_ENABLE = False 12 | VLLM_CUDA_MEM_ALIGN_KV_CACHE = False 13 | INIT_WINDOW_SZ = 1 14 | NUM_PREFETCH_BLOCKS = 1 15 | NUM_GSA_BLOCKS = 1 16 | 17 | 18 | class GSAConfig: 19 | def __init__(self): 20 | self.block_size = DEFAULT_BLOCK_SIZE 21 | self.init_windows_size = INIT_WINDOW_SZ 22 | self.num_prefetch_blocks = NUM_PREFETCH_BLOCKS 23 | self.min_topk_len = MIN_TOPK_LEN 24 | self.max_topk_len = MAX_TOPK_LEN 25 | 26 | def set_config(self, block_szie): 27 | self.block_size = block_szie 28 | self.min_topk_len = math.ceil(MIN_TOPK_LEN * DEFAULT_BLOCK_SIZE / block_szie) 29 | self.max_topk_len = math.ceil(MAX_TOPK_LEN * DEFAULT_BLOCK_SIZE / block_szie) 30 | self.num_prefetch_blocks = math.ceil( 31 | NUM_PREFETCH_BLOCKS * DEFAULT_BLOCK_SIZE / block_szie 32 | ) 33 | self.init_windows_size = math.ceil( 34 | INIT_WINDOW_SZ * DEFAULT_BLOCK_SIZE / block_szie 35 | ) 36 | self.num_gsa_blocks = math.ceil( 37 | NUM_GSA_BLOCKS * DEFAULT_BLOCK_SIZE / block_szie 38 | ) 39 | 40 | def compute_topk_len(self, raw_seq_len): 41 | topk_len = math.ceil(raw_seq_len * 0.3) 42 | # topk_len = max(1, topk_len) 43 | if topk_len < self.min_topk_len: 44 | topk_len = min(self.min_topk_len, raw_seq_len) 45 | elif topk_len > self.max_topk_len: 46 | topk_len = self.max_topk_len 47 | return topk_len 48 | 49 | 50 | gsa_config = GSAConfig() 51 | 52 | 53 | def round_up(x: int, y: int) -> int: 54 | return ((x + y - 1) // y) * y 55 | 56 | 57 | def get_type_size(dtype: torch.dtype) -> int: 58 | return torch.tensor([], dtype=dtype).element_size() 59 | 60 | 61 | def align_to_256bytes(extent: int, dtype: torch.dtype) -> int: 62 | dtype_szie = get_type_size(dtype) 63 | eles_per_256bytes = 256 // dtype_szie 64 | return round_up(extent, eles_per_256bytes) 65 | -------------------------------------------------------------------------------- /ucm/shared/vendor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(EnableDept) 2 | cmake_parse_arguments(DEPT "" "NAME;TAG" "GIT_URLS" ${ARGN}) 3 | find_program(GIT_EXECUTABLE git) 4 | if(NOT GIT_EXECUTABLE) 5 | message(FATAL_ERROR "git not found!") 6 | endif() 7 | foreach(GIT_URL IN LISTS DEPT_GIT_URLS) 8 | execute_process( 9 | COMMAND ${GIT_EXECUTABLE} ls-remote --heads "${GIT_URL}" 10 | RESULT_VARIABLE GIT_RESULT 11 | OUTPUT_QUIET 12 | ERROR_QUIET 13 | TIMEOUT 15 14 | ) 15 | if(GIT_RESULT EQUAL 0) 16 | set(VALID_GIT_URL ${GIT_URL}) 17 | break() 18 | endif() 19 | endforeach() 20 | if(NOT VALID_GIT_URL) 21 | message(FATAL_ERROR "all urls for ${DEPT_NAME} are not reachable!") 22 | endif() 23 | message(STATUS "Fetching ${DEPT_NAME}(${DEPT_TAG}) from ${VALID_GIT_URL}") 24 | FetchContent_Declare(${DEPT_NAME} GIT_REPOSITORY ${VALID_GIT_URL} GIT_TAG ${DEPT_TAG} GIT_SHALLOW TRUE) 25 | string(TOUPPER ${DEPT_NAME} NAME_UPPER) 26 | set(${NAME_UPPER}_INSTALL OFF CACHE INTERNAL "" FORCE) 27 | set(${NAME_UPPER}_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) 28 | set(${NAME_UPPER}_BUILD_EXAMPLES OFF CACHE INTERNAL "" FORCE) 29 | FetchContent_MakeAvailable(${DEPT_NAME}) 30 | endfunction() 31 | 32 | if(DOWNLOAD_DEPENDENCE) 33 | include(FetchContent) 34 | EnableDept( 35 | NAME fmt 36 | TAG 11.2.0 37 | GIT_URLS 38 | https://github.com/fmtlib/fmt.git 39 | https://gitcode.com/GitHub_Trending/fm/fmt.git 40 | ) 41 | EnableDept( 42 | NAME spdlog 43 | TAG v1.15.3 44 | GIT_URLS 45 | https://github.com/gabime/spdlog.git 46 | https://gitcode.com/GitHub_Trending/sp/spdlog.git 47 | ) 48 | EnableDept( 49 | NAME pybind11 50 | TAG v3.0.1 51 | GIT_URLS 52 | https://github.com/pybind/pybind11.git 53 | https://gitcode.com/GitHub_Trending/py/pybind11.git 54 | ) 55 | else() 56 | add_subdirectory(fmt) 57 | add_subdirectory(spdlog) 58 | add_subdirectory(pybind11) 59 | endif() 60 | -------------------------------------------------------------------------------- /ucm/sparse/gsa/offload_ops/include/k_repre.h: -------------------------------------------------------------------------------- 1 | #ifndef K_REPRE_H 2 | #define K_REPRE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "vec_product.h" 9 | 10 | namespace KRepre { 11 | 12 | using VecProductClass = VecProduct::VecProduct; 13 | 14 | /** 15 | * @brief Key表征计算器 16 | * 17 | * 提供基于向量均值的Key表征计算功能,支持多线程并行计算和SIMD优化 18 | */ 19 | class KRepreComputer { 20 | public: 21 | KRepreComputer() = default; 22 | 23 | /** 24 | * @brief 禁用拷贝构造和赋值 25 | */ 26 | KRepreComputer(const KRepreComputer&) = delete; 27 | KRepreComputer& operator=(const KRepreComputer&) = delete; 28 | 29 | /** 30 | * @brief 计算单个Block的K表征 31 | * 32 | * @param kArray k向量指针数组 [kHead, blockSize, headSize] 33 | * @param kHead k头数量 34 | * @param blockSize block内k向量数量 35 | * @param headSize 向量维度 36 | * @param kRepreBlock 单block 表征 [kHead, headSize] 37 | */ 38 | void ComputeKRepreBlock(const float* __restrict kArray, 39 | uint32_t kHead, 40 | uint32_t blockSize, 41 | uint32_t headSize, 42 | float* __restrict kRepreBlock) const; 43 | 44 | /** 45 | * @brief 计算多个Block的K表征(使用OpenMP并行优化) 46 | * 47 | * @param kArray k向量指针数组 [kHead, blockSize, headSize] 48 | * @param numBlock block数量 49 | * @param kHead k头数量 50 | * @param blockSize block内k向量数量 51 | * @param headSize 向量维度 52 | * @param kRepreBlockArray 全量K表征 [numBlock, kHead, x, headSize] 53 | */ 54 | void ComputeKRepre(const std::vector& kArray, 55 | uint32_t numBlock, 56 | uint32_t kHead, 57 | uint32_t blockSize, 58 | uint32_t headSize, 59 | const std::vector& kRepreBlockArray) const; 60 | 61 | private: 62 | // 线程本地VecProduct实例管理 63 | class ThreadLocalVecProduct { 64 | public: 65 | static const VecProductClass& GetInstance(); 66 | private: 67 | ThreadLocalVecProduct() = default; 68 | }; 69 | }; 70 | 71 | } 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /ucm/shared/trans/buffer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_TRANS_BUFFER_H 25 | #define UNIFIEDCACHE_TRANS_BUFFER_H 26 | 27 | #include 28 | #include "status/status.h" 29 | 30 | namespace UC::Trans { 31 | 32 | class Buffer { 33 | public: 34 | virtual ~Buffer() = default; 35 | 36 | virtual std::shared_ptr MakeDeviceBuffer(size_t size) = 0; 37 | virtual Status MakeDeviceBuffers(size_t size, size_t number) = 0; 38 | virtual std::shared_ptr GetDeviceBuffer(size_t size) = 0; 39 | 40 | virtual std::shared_ptr MakeHostBuffer(size_t size) = 0; 41 | virtual Status MakeHostBuffers(size_t size, size_t number) = 0; 42 | virtual std::shared_ptr GetHostBuffer(size_t size) = 0; 43 | 44 | static Status RegisterHostBuffer(void* host, size_t size, void** pDevice = nullptr); 45 | static void UnregisterHostBuffer(void* host); 46 | }; 47 | 48 | } // namespace UC::Trans 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /ucm/sparse/esa/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(BUILD_NUMA) 2 | message(STATUS "Building numactl library...") 3 | 4 | set(NUMA_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/numa_install) 5 | FetchContent_Declare( 6 | numactl 7 | URL https://github.com/numactl/numactl/releases/download/v2.0.16/numactl-2.0.16.tar.gz 8 | TLS_VERIFY OFF 9 | ) 10 | FetchContent_MakeAvailable(numactl) 11 | if(NOT EXISTS "${NUMA_INSTALL_DIR}/lib/libnuma.so") 12 | message(STATUS "Configuring numactl...") 13 | execute_process( 14 | COMMAND ./configure --prefix=${NUMA_INSTALL_DIR} 15 | WORKING_DIRECTORY ${numactl_SOURCE_DIR} 16 | RESULT_VARIABLE numa_configure_result 17 | OUTPUT_VARIABLE numa_configure_output 18 | ERROR_VARIABLE numa_configure_error 19 | ) 20 | if(NOT numa_configure_result EQUAL 0) 21 | message(FATAL_ERROR "Failed to configure numactl. \n" 22 | "Result: ${numa_configure_result}\n" 23 | "STDOUT: ${numa_configure_output}\n" 24 | "STDERR: ${numa_configure_error}\n") 25 | endif() 26 | 27 | message(STATUS "Building and installing numactl...") 28 | execute_process( 29 | COMMAND make install -j8 30 | WORKING_DIRECTORY ${numactl_SOURCE_DIR} 31 | RESULT_VARIABLE numa_install_result 32 | OUTPUT_VARIABLE numa_install_output 33 | ERROR_VARIABLE numa_install_error 34 | ) 35 | if(NOT numa_install_result EQUAL 0) 36 | message(FATAL_ERROR "Failed to build and install numactl. \n" 37 | "Result: ${numa_install_result}\n" 38 | "STDOUT: ${numa_install_output}\n" 39 | "STDERR: ${numa_install_error}\n") 40 | endif() 41 | else() 42 | message(STATUS "Found already built libnuma. Skipping build.") 43 | endif() 44 | 45 | add_definitions(-DNUMA_ENABLED) 46 | else() 47 | message(STATUS "Skipping numactl build...") 48 | endif() 49 | 50 | add_subdirectory(retrieval) 51 | -------------------------------------------------------------------------------- /ucm/sandbox/sparse/retake/docs/prepare_mlvu.md: -------------------------------------------------------------------------------- 1 | ## Prepare MLVU Dataset 2 | 3 | 4 | ### Step 1: Download MLVU dataset from [huggingface](https://huggingface.co/datasets/MLVU/MVLU) 5 | ```bash 6 | git clone https://huggingface.co/datasets/MLVU/MVLU 7 | git clone https://huggingface.co/datasets/MLVU/MLVU_Test 8 | ``` 9 | 10 | Denote the root directory of download MLVU dataset as `mlvu_root`, it should has the following structure: 11 | ``` 12 | ${mlvu_root}/ 13 | ├── MLVU/ 14 | ├── json 15 | ... 16 | ├── video 17 | ... 18 | ├── figs/ 19 | ``` 20 | 21 | Denote the root directory of download MLVU-Test dataset as `mlvu_test_root`, it should has the following structure: 22 | ``` 23 | ${mlvu_test_root}/ 24 | ├── MLVU_Test/ 25 | ├── test_question.json 26 | ├── test_video.tar.gz.part-aa 27 | ├── test_video.tar.gz.part-ab 28 | ... 29 | ├── figs/ 30 | ├── test_generation_tasks.json 31 | ├── test_multi_choice_tasks.json 32 | ``` 33 | 34 | Unzip MLVU-Test videos: 35 | ```bash 36 | cd MLVU_Test 37 | cat test_video.tar.gz.part-* | tar -xzvf - 38 | ``` 39 | 40 | 41 | ### Step 2: Extract frames of all videos 42 | ```bash 43 | cd ${retake_root} 44 | python scripts/utils/frame_extraction.py \ 45 | --videofile_tpl ${mlvu_root}/MLVU/video/'*/*.mp4' \ 46 | --results_dir ${mlvu_root}/MLVU/video_25fps \ 47 | --fps 25 \ 48 | --num_workers 32 49 | python scripts/utils/frame_extraction.py \ 50 | --videofile_tpl ${mlvu_test_root}/MLVU_Test/video/'*/*.mp4' \ 51 | --results_dir ${mlvu_test_root}/MLVU_Test/video_25fps \ 52 | --fps 25 \ 53 | --num_workers 32 54 | ``` 55 | 56 | 57 | ### Step 3: Build MLVU dataset 58 | ```bash 59 | cd ${retake_root} 60 | python scripts/utils/build_mlvu_dataset.py --hf_root ${mlvu_root} 61 | python scripts/utils/build_mlvu_test_dataset.py --hf_root ${mlvu_test_root} 62 | ``` 63 | Note that you can NOT modify folder `${mlvu_root}/MLVU/video_25fps` and `${mlvu_test_root}/MLVU_Test/video_25fps` after this step, since the absolute path of extracted frames are written into annotation files `mlvu.json` and `mlvu_test.json`: 64 | ``` 65 | retake_root/ 66 | ├── dataset/ 67 | ├── mlvu/ 68 | ├── mlvu.json 69 | ├── mlvu_test.json 70 | ├── ... 71 | ``` -------------------------------------------------------------------------------- /ucm/sparse/kvcomp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(BUILD_NUMA) 2 | message(STATUS "Building numactl library...") 3 | 4 | set(NUMA_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/numa_install) 5 | FetchContent_Declare( 6 | numactl 7 | URL https://github.com/numactl/numactl/releases/download/v2.0.16/numactl-2.0.16.tar.gz 8 | TLS_VERIFY OFF 9 | ) 10 | FetchContent_MakeAvailable(numactl) 11 | if(NOT EXISTS "${NUMA_INSTALL_DIR}/lib/libnuma.so") 12 | message(STATUS "Configuring numactl...") 13 | execute_process( 14 | COMMAND ./configure --prefix=${NUMA_INSTALL_DIR} 15 | WORKING_DIRECTORY ${numactl_SOURCE_DIR} 16 | RESULT_VARIABLE numa_configure_result 17 | OUTPUT_VARIABLE numa_configure_output 18 | ERROR_VARIABLE numa_configure_error 19 | ) 20 | if(NOT numa_configure_result EQUAL 0) 21 | message(FATAL_ERROR "Failed to configure numactl. \n" 22 | "Result: ${numa_configure_result}\n" 23 | "STDOUT: ${numa_configure_output}\n" 24 | "STDERR: ${numa_configure_error}\n") 25 | endif() 26 | 27 | message(STATUS "Building and installing numactl...") 28 | execute_process( 29 | COMMAND make install -j8 30 | WORKING_DIRECTORY ${numactl_SOURCE_DIR} 31 | RESULT_VARIABLE numa_install_result 32 | OUTPUT_VARIABLE numa_install_output 33 | ERROR_VARIABLE numa_install_error 34 | ) 35 | if(NOT numa_install_result EQUAL 0) 36 | message(FATAL_ERROR "Failed to build and install numactl. \n" 37 | "Result: ${numa_install_result}\n" 38 | "STDOUT: ${numa_install_output}\n" 39 | "STDERR: ${numa_install_error}\n") 40 | endif() 41 | else() 42 | message(STATUS "Found already built libnuma. Skipping build.") 43 | endif() 44 | 45 | add_definitions(-DNUMA_ENABLED) 46 | else() 47 | message(STATUS "Skipping numactl build...") 48 | endif() 49 | 50 | add_subdirectory(hash_retrieval) 51 | -------------------------------------------------------------------------------- /ucm/logger.py: -------------------------------------------------------------------------------- 1 | # 2 | # MIT License 3 | # 4 | # Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in all 14 | # copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # SOFTWARE. 23 | # 24 | 25 | import logging 26 | import os 27 | 28 | 29 | def init_logger(name: str = "UNIFIED_CACHE") -> logging.Logger: 30 | log_level = os.getenv("UNIFIED_CACHE_LOG_LEVEL", "INFO").upper() 31 | 32 | logger = logging.getLogger(name) 33 | logger.setLevel(log_level) 34 | 35 | if not logger.handlers: 36 | handler = logging.StreamHandler() 37 | formatter = logging.Formatter( 38 | "[%(asctime)s] - %(name)s - %(levelname)s [%(filename)s:%(lineno)d] %(message)s", 39 | datefmt="%Y-%m-%d %H:%M:%S", 40 | ) 41 | 42 | handler.setFormatter(formatter) 43 | logger.addHandler(handler) 44 | 45 | return logger 46 | 47 | 48 | if __name__ == "__main__": 49 | os.environ["UNIFIED_CACHE_LOG_LEVEL"] = "DEBUG" 50 | logger = init_logger() 51 | logger.debug("debug message") 52 | logger.info("info message") 53 | logger.warning("warning message") 54 | logger.error("error message") 55 | -------------------------------------------------------------------------------- /ucm/store/ucmstore.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_STORE_H 25 | #define UNIFIEDCACHE_STORE_H 26 | 27 | #include "task/task_shard.h" 28 | 29 | namespace UC { 30 | 31 | template 32 | class CCStore { 33 | using BlockId = std::string; 34 | using TaskId = size_t; 35 | 36 | public: 37 | virtual ~CCStore() = default; 38 | virtual int32_t Alloc(const BlockId& block) = 0; 39 | virtual bool Lookup(const BlockId& block) = 0; 40 | virtual void Commit(const BlockId& block, const bool success) = 0; 41 | virtual std::list Alloc(const std::list& blocks) = 0; 42 | virtual std::list Lookup(const std::list& blocks) = 0; 43 | virtual void Commit(const std::list& blocks, const bool success) = 0; 44 | virtual TaskId Submit(T&& task) = 0; 45 | virtual int32_t Wait(const TaskId task) = 0; 46 | virtual int32_t Check(const TaskId task, bool& finish) = 0; 47 | }; 48 | 49 | } // namespace UC 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /ucm/shared/metrics/cpy/metrics.py.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #include 25 | #include 26 | #include "stats_monitor.h" 27 | 28 | namespace py = pybind11; 29 | namespace UC::Metrics { 30 | 31 | void bind_monitor(py::module_& m) 32 | { 33 | py::class_(m, "StatsMonitor") 34 | .def_static("get_instance", &StatsMonitor::GetInstance, py::return_value_policy::reference) 35 | .def("update_stats", &StatsMonitor::UpdateStats) 36 | .def("reset_all", &StatsMonitor::ResetAllStats) 37 | .def("get_stats", &StatsMonitor::GetStats) 38 | .def("get_stats_and_clear", &StatsMonitor::GetStatsAndClear); 39 | } 40 | 41 | } // namespace UC::Metrics 42 | 43 | PYBIND11_MODULE(ucmmonitor, module) 44 | { 45 | module.attr("project") = UCM_PROJECT_NAME; 46 | module.attr("version") = UCM_PROJECT_VERSION; 47 | module.attr("commit_id") = UCM_COMMIT_ID; 48 | module.attr("build_type") = UCM_BUILD_TYPE; 49 | UC::Metrics::bind_monitor(module); 50 | } -------------------------------------------------------------------------------- /ucm/shared/metrics/cc/stats_registry.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_REGISTRY_H 25 | #define UNIFIEDCACHE_REGISTRY_H 26 | 27 | #include 28 | #include 29 | #include 30 | #include "stats/istats.h" 31 | 32 | namespace UC::Metrics { 33 | 34 | using Creator = std::unique_ptr (*)(); 35 | 36 | class StatsRegistry { 37 | public: 38 | static StatsRegistry& GetInstance(); 39 | 40 | static void RegisterStats(std::string name, Creator creator); 41 | 42 | std::unique_ptr CreateStats(const std::string& name); 43 | 44 | std::vector GetRegisteredStatsNames(); 45 | 46 | private: 47 | StatsRegistry() = default; 48 | ~StatsRegistry() = default; 49 | StatsRegistry(const StatsRegistry&) = delete; 50 | StatsRegistry& operator=(const StatsRegistry&) = delete; 51 | 52 | std::mutex mutex_; 53 | std::unordered_map registry_; 54 | }; 55 | 56 | } // namespace UC::Metrics 57 | 58 | #endif // UNIFIEDCACHE_REGISTRY_H -------------------------------------------------------------------------------- /ucm/shared/trans/simu/simu_device.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #include 25 | #include "simu_buffer.h" 26 | #include "simu_stream.h" 27 | #include "trans/device.h" 28 | 29 | namespace UC::Trans { 30 | 31 | Status Device::Setup(int32_t deviceId) 32 | { 33 | if (deviceId < 0) { return Status::Error(fmt::format("invalid device id({})", deviceId)); } 34 | return Status::OK(); 35 | } 36 | 37 | std::unique_ptr Device::MakeStream() 38 | { 39 | std::unique_ptr stream = nullptr; 40 | try { 41 | stream = std::make_unique(); 42 | } catch (...) { 43 | return nullptr; 44 | } 45 | if (stream->Setup().Success()) { return stream; } 46 | return nullptr; 47 | } 48 | 49 | std::unique_ptr Device::MakeSMStream() { return MakeStream(); } 50 | 51 | std::unique_ptr Device::MakeBuffer() 52 | { 53 | try { 54 | return std::make_unique(); 55 | } catch (...) { 56 | return nullptr; 57 | } 58 | } 59 | 60 | } // namespace UC::Trans 61 | -------------------------------------------------------------------------------- /ucm/sparse/factory.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from typing import Callable 3 | 4 | from vllm.config import VllmConfig 5 | 6 | from ucm.logger import init_logger 7 | from ucm.sparse.base import UcmSparseBase, UcmSparseRole 8 | from ucm.utils import Config 9 | 10 | logger = init_logger(__name__) 11 | 12 | 13 | class UcmSparseFactory: 14 | _registry: dict[str, Callable[[], type[UcmSparseBase]]] = {} 15 | 16 | @classmethod 17 | def register_sparse_method( 18 | cls, name: str, module_path: str, class_name: str 19 | ) -> None: 20 | """Register a sparse attention method with a lazy-loading module and class name.""" 21 | if name in cls._registry: 22 | raise ValueError(f"Sparse attention method '{name}' is already registered.") 23 | 24 | def loader() -> type[UcmSparseBase]: 25 | module = importlib.import_module(module_path) 26 | return getattr(module, class_name) 27 | 28 | cls._registry[name] = loader 29 | 30 | @classmethod 31 | def create_sparse_method( 32 | cls, config: "VllmConfig", role: UcmSparseRole 33 | ) -> UcmSparseBase: 34 | ucm_config = Config(config.kv_transfer_config) 35 | ucm_cfg = ucm_config.get_config().get("ucm_sparse_config") 36 | 37 | sparse_method_name, _ = next(iter(ucm_cfg.items())) 38 | if sparse_method_name in cls._registry: 39 | sparse_method_cls = cls._registry[sparse_method_name]() 40 | else: 41 | raise ValueError(f"Unsupported sparse method type: {sparse_method_name}") 42 | assert issubclass(sparse_method_cls, UcmSparseBase) 43 | logger.info("Creating sparse method with name: %s", sparse_method_name) 44 | return sparse_method_cls(config, role) 45 | 46 | 47 | # Register available sparse methods 48 | UcmSparseFactory.register_sparse_method("ESA", "ucm.sparse.esa.esa", "ESA") 49 | UcmSparseFactory.register_sparse_method("KvComp", "ucm.sparse.kvcomp.kvcomp", "KvComp") 50 | UcmSparseFactory.register_sparse_method("GSA", "ucm.sparse.gsa.gsa", "GSA") 51 | UcmSparseFactory.register_sparse_method( 52 | "KVStarMultiStep", "ucm.sparse.kvstar.multistep", "KVStarMultiStep" 53 | ) 54 | UcmSparseFactory.register_sparse_method("Blend", "ucm.sparse.blend.blend", "Blend") 55 | -------------------------------------------------------------------------------- /ucm/store/nfsstore/cc/domain/space/space_layout.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_SPACE_LAYOUT_H 25 | #define UNIFIEDCACHE_SPACE_LAYOUT_H 26 | 27 | #include 28 | #include 29 | #include 30 | #include "status/status.h" 31 | 32 | namespace UC { 33 | 34 | class SpaceLayout { 35 | public: 36 | struct DataIterator; 37 | public: 38 | virtual ~SpaceLayout() = default; 39 | virtual Status Setup(const std::vector& storageBackends) = 0; 40 | virtual std::string DataFileParent(const std::string& blockId, bool activated) const = 0; 41 | virtual std::string DataFilePath(const std::string& blockId, bool activated) const = 0; 42 | virtual std::string ClusterPropertyFilePath() const = 0; 43 | virtual std::shared_ptr CreateFilePathIterator() const = 0; 44 | virtual std::string NextDataFilePath(std::shared_ptr iter) const = 0; 45 | virtual bool IsActivatedFile(const std::string& filePath) const = 0; 46 | }; 47 | 48 | } // namespace UC 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /.github/workflows/ucmstore.yml: -------------------------------------------------------------------------------- 1 | # This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage. 2 | # See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml 3 | name: ucmstore 4 | 5 | on: 6 | push: 7 | branches: [ "*" ] 8 | pull_request: 9 | branches: [ "dev*", "main", "*release", "feature*" ] 10 | 11 | env: 12 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 13 | BUILD_TYPE: Debug 14 | 15 | jobs: 16 | cc_gtest: 17 | # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. 18 | # You can convert this to a matrix build if you need cross-platform coverage. 19 | # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - name: Install googletest 26 | run: | 27 | git clone https://github.com/google/googletest.git --depth=1 --branch=v1.17.0 28 | cd googletest 29 | mkdir build && cd build 30 | cmake -DCMAKE_CXX_FLAGS="-fPIC" -DCMAKE_C_FLAGS="-fPIC" -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_STANDARD_REQUIRED=True .. 31 | sudo make install -j 32 | 33 | - name: Configure CMake 34 | # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. 35 | # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type 36 | run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBUILD_UCM_SPARSE=OFF -DBUILD_UNIT_TESTS=ON -DRUNTIME_ENVIRONMENT=simu 37 | 38 | - name: Build 39 | # Build your program with the given configuration 40 | run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} -j 41 | 42 | - name: Test 43 | working-directory: ${{github.workspace}}/build 44 | # Execute tests defined by the CMake configuration. 45 | # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail 46 | run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure 47 | -------------------------------------------------------------------------------- /ucm/shared/metrics/cc/stats_registry.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #include "stats_registry.h" 25 | 26 | namespace UC::Metrics { 27 | 28 | StatsRegistry& StatsRegistry::GetInstance() 29 | { 30 | static StatsRegistry inst; 31 | return inst; 32 | } 33 | 34 | void StatsRegistry::RegisterStats(std::string name, Creator creator) 35 | { 36 | auto& reg = GetInstance(); 37 | std::lock_guard lk(reg.mutex_); 38 | reg.registry_[name] = creator; 39 | } 40 | 41 | std::unique_ptr StatsRegistry::CreateStats(const std::string& name) 42 | { 43 | auto& reg = GetInstance(); 44 | std::lock_guard lk(reg.mutex_); 45 | if (auto it = reg.registry_.find(name); it != reg.registry_.end()) return it->second(); 46 | return nullptr; 47 | } 48 | 49 | std::vector StatsRegistry::GetRegisteredStatsNames() 50 | { 51 | auto& reg = GetInstance(); 52 | std::lock_guard lk(reg.mutex_); 53 | std::vector names; 54 | names.reserve(reg.registry_.size()); 55 | for (auto& [n, _] : reg.registry_) names.push_back(n); 56 | return names; 57 | } 58 | 59 | } // namespace UC::Metrics -------------------------------------------------------------------------------- /ucm/store/nfsstore/cc/domain/trans/trans_manager.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_TRANS_MANAGER_H 25 | #define UNIFIEDCACHE_TRANS_MANAGER_H 26 | 27 | #include "posix_queue.h" 28 | #include "task/task_manager.h" 29 | 30 | namespace UC { 31 | 32 | class TransManager : public TaskManager { 33 | public: 34 | Status Setup(const int32_t deviceId, const size_t streamNumber, const size_t ioSize, 35 | const size_t bufferNumber, const SpaceLayout* layout, const size_t timeoutMs, 36 | bool useDirect = false) 37 | { 38 | this->timeoutMs_ = timeoutMs; 39 | auto status = Status::OK(); 40 | for (size_t i = 0; i < streamNumber; i++) { 41 | auto q = std::make_shared(); 42 | status = q->Setup(deviceId, ioSize, bufferNumber, &this->failureSet_, layout, timeoutMs, 43 | useDirect); 44 | if (status.Failure()) { break; } 45 | this->queues_.emplace_back(std::move(q)); 46 | } 47 | return status; 48 | } 49 | }; 50 | 51 | } // namespace UC 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /ucm/store/nfsstore/cc/domain/hotness/hotness_timer.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | 25 | #ifndef UNIFIEDCACHE_HOTNESS_TIMER_H 26 | #define UNIFIEDCACHE_HOTNESS_TIMER_H 27 | #include 28 | #include 29 | #include "logger/logger.h" 30 | #include "template/timer.h" 31 | 32 | namespace UC { 33 | 34 | class HotnessTimer { 35 | public: 36 | void SetInterval(const size_t interval) { this->interval_ = std::chrono::seconds(interval); } 37 | Status Start(std::function callable) 38 | { 39 | try { 40 | this->timer_ = std::make_unique>>(this->interval_, 41 | std::move(callable)); 42 | } catch (const std::exception& e) { 43 | UC_ERROR("Failed({}) to start hotness timer.", e.what()); 44 | return Status::OutOfMemory(); 45 | } 46 | return this->timer_->Start() ? Status::OK() : Status::Error(); 47 | } 48 | 49 | private: 50 | std::chrono::seconds interval_; 51 | std::unique_ptr>> timer_; 52 | }; 53 | 54 | } // namespace UC 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /ucm/store/pcstore/cc/domain/space/space_layout.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_SPACE_LAYOUT_H 25 | #define UNIFIEDCACHE_SPACE_LAYOUT_H 26 | 27 | #include 28 | #include 29 | #include "status/status.h" 30 | 31 | namespace UC { 32 | 33 | class SpaceLayout { 34 | public: 35 | Status Setup(const std::vector& storageBackends); 36 | std::string DataFilePath(const std::string& blockId, bool activated) const; 37 | Status Commit(const std::string& blockId, bool success) const; 38 | 39 | private: 40 | std::vector RelativeRoots() const; 41 | Status AddStorageBackend(const std::string& path); 42 | Status AddFirstStorageBackend(const std::string& path); 43 | Status AddSecondaryStorageBackend(const std::string& path); 44 | std::string StorageBackend(const std::string& blockId) const; 45 | std::string DataFileRoot() const; 46 | std::string TempFileRoot() const; 47 | void ShardBlockId(const std::string& blockId, uint64_t& front, uint64_t& back) const; 48 | 49 | private: 50 | std::vector storageBackends_; 51 | }; 52 | 53 | } // namespace UC 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /ucm/shared/trans/ascend/ascend_device.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #include 25 | #include "ascend_buffer.h" 26 | #include "ascend_stream.h" 27 | #include "trans/device.h" 28 | 29 | namespace UC::Trans { 30 | 31 | Status Device::Setup(int32_t deviceId) 32 | { 33 | if (deviceId < 0) { return Status::Error(fmt::format("invalid device id({})", deviceId)); } 34 | auto ret = aclrtSetDevice(deviceId); 35 | if (ret == ACL_SUCCESS) { return Status::OK(); } 36 | return Status{ret, std::to_string(ret)}; 37 | } 38 | 39 | std::unique_ptr Device::MakeStream() 40 | { 41 | std::unique_ptr stream = nullptr; 42 | try { 43 | stream = std::make_unique(); 44 | } catch (...) { 45 | return nullptr; 46 | } 47 | if (stream->Setup().Success()) { return stream; } 48 | return nullptr; 49 | } 50 | 51 | std::unique_ptr Device::MakeSMStream() { return nullptr; } 52 | 53 | std::unique_ptr Device::MakeBuffer() 54 | { 55 | try { 56 | return std::make_unique(); 57 | } catch (...) { 58 | return nullptr; 59 | } 60 | } 61 | 62 | } // namespace UC::Trans 63 | -------------------------------------------------------------------------------- /ucm/store/nfsstore/cc/domain/space/space_recycle.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MIT License 3 | * 4 | * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * */ 24 | #ifndef UNIFIEDCACHE_SPACE_RECYCLE_H 25 | #define UNIFIEDCACHE_SPACE_RECYCLE_H 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "space_layout.h" 33 | 34 | namespace UC { 35 | 36 | class SpaceRecycle { 37 | public: 38 | using RecycleOneBlockDone = std::function; 39 | SpaceRecycle() = default; 40 | SpaceRecycle(const SpaceRecycle&) = delete; 41 | SpaceRecycle& operator=(const SpaceRecycle&) = delete; 42 | ~SpaceRecycle(); 43 | Status Setup(const SpaceLayout* layout, const size_t totalNumber, 44 | RecycleOneBlockDone done); 45 | void Trigger(); 46 | private: 47 | void Recycler(); 48 | private: 49 | bool stop_{false}; 50 | bool recycling_{false}; 51 | std::atomic_bool serviceRunning_{false}; 52 | uint32_t recycleNum_{0}; 53 | RecycleOneBlockDone recycleOneBlockDone_; 54 | const SpaceLayout* layout_{nullptr}; 55 | std::mutex mtx_; 56 | std::condition_variable cv_; 57 | std::thread worker_; 58 | }; 59 | 60 | } // namespace UC 61 | #endif --------------------------------------------------------------------------------