├── requirements.txt
├── ucm
    ├── pd
    │   └── __init__.py
    ├── shared
    │   ├── __init__.py
    │   ├── trans
    │   │   ├── __init__.py
    │   │   ├── simu
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── simu_buffer.h
    │   │   │   └── simu_device.cc
    │   │   ├── ascend
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── ascend_buffer.h
    │   │   │   └── ascend_device.cc
    │   │   ├── cuda
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── cuda_buffer.h
    │   │   │   ├── cuda_sm_kernel.h
    │   │   │   └── cuda_sm_stream.h
    │   │   ├── CMakeLists.txt
    │   │   ├── maca
    │   │   │   └── CMakeLists.txt
    │   │   ├── device.h
    │   │   └── buffer.h
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   ├── CMakeLists.txt
    │   │   ├── cc
    │   │   │   ├── stats
    │   │   │   │   └── istats.h
    │   │   │   ├── stats_registry.h
    │   │   │   └── stats_registry.cc
    │   │   ├── test
    │   │   │   └── test.py
    │   │   └── cpy
    │   │   │   └── metrics.py.cc
    │   ├── CMakeLists.txt
    │   ├── test
    │   │   └── CMakeLists.txt
    │   ├── infra
    │   │   ├── CMakeLists.txt
    │   │   ├── time
    │   │   │   ├── now_time.h
    │   │   │   └── stopwatch.h
    │   │   └── template
    │   │   │   └── singleton.h
    │   └── vendor
    │   │   └── CMakeLists.txt
    ├── sparse
    │   ├── __init__.py
    │   ├── blend
    │   │   └── __init__.py
    │   ├── esa
    │   │   ├── __init__.py
    │   │   ├── retrieval
    │   │   │   ├── __init__.py
    │   │   │   └── CMakeLists.txt
    │   │   └── CMakeLists.txt
    │   ├── gsa
    │   │   ├── __init__.py
    │   │   ├── offload_ops
    │   │   │   ├── __init__.py
    │   │   │   ├── include
    │   │   │   │   ├── thread_safe_queue.h
    │   │   │   │   └── k_repre.h
    │   │   │   └── src
    │   │   │   │   ├── thread_safe_queue.cpp
    │   │   │   │   ├── pybinds.cpp
    │   │   │   │   └── k_repre.cpp
    │   │   ├── prefetch
    │   │   │   ├── __init__.py
    │   │   │   └── src
    │   │   │   │   └── pybinds.cpp
    │   │   └── CMakeLists.txt
    │   ├── kvcomp
    │   │   ├── .gitkeep
    │   │   ├── __init__.py
    │   │   ├── hash_retrieval
    │   │   │   ├── __init__.py
    │   │   │   └── CMakeLists.txt
    │   │   ├── figs
    │   │   │   ├── kvcomp_scheme.jpg
    │   │   │   ├── kvcomp_longbench.jpg
    │   │   │   ├── kvcomp_end_to_end_performance.jpg
    │   │   │   └── kvcomp_single_layer_performance.jpg
    │   │   ├── paper
    │   │   │   └── kvcomp-ACL-2025-paper.pdf
    │   │   ├── configs
    │   │   │   ├── kvcomp_deepseek_v2_lite_config.json
    │   │   │   └── kvcomp_qwen3_4B_config.json
    │   │   └── CMakeLists.txt
    │   ├── kvstar
    │   │   ├── .gitkeep
    │   │   ├── __init__.py
    │   │   ├── retrieve
    │   │   │   ├── __init__.py
    │   │   │   ├── core
    │   │   │   │   ├── domain
    │   │   │   │   │   └── retrieve_task
    │   │   │   │   │   │   ├── task_status.h
    │   │   │   │   │   │   ├── simd_compute_kernel.h
    │   │   │   │   │   │   ├── computation_task.h
    │   │   │   │   │   │   ├── retrieve_task_runner.h
    │   │   │   │   │   │   ├── task_result.h
    │   │   │   │   │   │   ├── retrieve_task_waiter.h
    │   │   │   │   │   │   ├── retrieve_task_runner.cpp
    │   │   │   │   │   │   ├── retrieve_task_manager.h
    │   │   │   │   │   │   ├── retrieve_task_queue.h
    │   │   │   │   │   │   ├── retrieve_task_set.h
    │   │   │   │   │   │   └── retrieve_task.h
    │   │   │   │   ├── infra
    │   │   │   │   │   ├── template
    │   │   │   │   │   │   └── singleton.h
    │   │   │   │   │   ├── memory
    │   │   │   │   │   │   ├── memory.cpp
    │   │   │   │   │   │   └── memory.h
    │   │   │   │   │   ├── logger
    │   │   │   │   │   │   ├── logger.h
    │   │   │   │   │   │   └── logger.cpp
    │   │   │   │   │   └── thread
    │   │   │   │   │   │   └── latch.h
    │   │   │   │   ├── CMakeLists.txt
    │   │   │   │   └── api
    │   │   │   │   │   └── kvstar_retrieve
    │   │   │   │   │       ├── kvstar_retrieve.h
    │   │   │   │   │       └── kvstar_retrieve.cpp
    │   │   │   └── py_intf
    │   │   │   │   └── CMakeLists.txt
    │   │   └── CMakeLists.txt
    │   ├── CMakeLists.txt
    │   ├── utils.py
    │   └── factory.py
    ├── store
    │   ├── __init__.py
    │   ├── nfsstore
    │   │   ├── __init__.py
    │   │   ├── device
    │   │   │   ├── simu
    │   │   │   │   └── CMakeLists.txt
    │   │   │   ├── musa
    │   │   │   │   └── CMakeLists.txt
    │   │   │   ├── cuda
    │   │   │   │   └── CMakeLists.txt
    │   │   │   ├── ascend
    │   │   │   │   └── CMakeLists.txt
    │   │   │   ├── CMakeLists.txt
    │   │   │   └── maca
    │   │   │   │   └── CMakeLists.txt
    │   │   ├── CMakeLists.txt
    │   │   └── cc
    │   │   │   └── domain
    │   │   │       ├── hotness
    │   │   │           ├── hotness_set.h
    │   │   │           └── hotness_timer.h
    │   │   │       ├── space
    │   │   │           ├── space_shard_temp_layout.h
    │   │   │           ├── space_property.h
    │   │   │           ├── space_layout.h
    │   │   │           └── space_recycle.h
    │   │   │       └── trans
    │   │   │           └── trans_manager.h
    │   ├── pcstore
    │   │   ├── __init__.py
    │   │   ├── CMakeLists.txt
    │   │   └── cc
    │   │   │   └── domain
    │   │   │       └── space
    │   │   │           ├── space_manager.h
    │   │   │           └── space_layout.h
    │   ├── mooncakestore
    │   │   ├── CMakeLists.txt
    │   │   └── __init__.py
    │   ├── test
    │   │   └── CMakeLists.txt
    │   ├── detail
    │   │   ├── CMakeLists.txt
    │   │   └── task
    │   │   │   ├── task_set.h
    │   │   │   ├── task_queue.h
    │   │   │   └── task_waiter.h
    │   ├── CMakeLists.txt
    │   └── ucmstore.h
    ├── integration
    │   ├── __init__.py
    │   └── vllm
    │   │   ├── __init__.py
    │   │   └── patch
    │   │       ├── __init__.py
    │   │       └── patch_funcs
    │   │           ├── __init__.py
    │   │           └── v092
    │   │               └── __init__.py
    ├── sandbox
    │   ├── agentic_ai
    │   │   └── README.md
    │   └── sparse
    │   │   └── retake
    │   │       ├── .gitkeep
    │   │       ├── .gitignore
    │   │       ├── misc
    │   │           └── flexreduc_pipeline.png
    │   │       ├── requirements.txt
    │   │       ├── scripts
    │   │           ├── infer_eval.sh
    │   │           └── submission
    │   │           │   ├── prepare_lvbench_submission.py
    │   │           │   └── prepare_videomme_submission.py
    │   │       ├── configs
    │   │           ├── qwen2_vl
    │   │           │   ├── qwen2-vl_mlvu.yaml
    │   │           │   ├── qwen2-vl_lvbench.yaml
    │   │           │   ├── qwen2-vl_videomme.yaml
    │   │           │   ├── retake_qwen2-vl_videomme.yaml
    │   │           │   ├── retake_qwen2-vl_mlvu.yaml
    │   │           │   └── retake_qwen2-vl_lvbench.yaml
    │   │           ├── llava_video
    │   │           │   ├── llava-video_mlvu.yaml
    │   │           │   ├── llava-video_lvbench.yaml
    │   │           │   ├── llava-video_videomme.yaml
    │   │           │   ├── retake_llava-video_lvbench.yaml
    │   │           │   ├── retake_llava-video_mlvu.yaml
    │   │           │   └── retake_llava-video_videomme.yaml
    │   │           ├── qwen2_5_vl
    │   │           │   ├── qwen2-5-vl_videomme_f256.yaml
    │   │           │   ├── flexreduc_qwen2-5-vl_mlvu.yaml
    │   │           │   ├── flexreduc_qwen2-5-vl_lvbench.yaml
    │   │           │   ├── flexreduc_qwen2-5-vl_videomme.yaml
    │   │           │   └── flexreduc_qwen2-5-vl_longvideobench.yaml
    │   │           ├── demo.yaml
    │   │           └── demo_npu.yaml
    │   │       ├── environment_npu.yaml
    │   │       └── docs
    │   │           ├── prepare_videomme.md
    │   │           ├── prepare_lvbench.md
    │   │           ├── prepare_longvideobench.md
    │   │           └── prepare_mlvu.md
    ├── CMakeLists.txt
    ├── __init__.py
    └── logger.py
├── benchmarks
    └── .gitkeep
├── test
    ├── CMakeLists.txt
    ├── common
    │   ├── __init__.py
    │   ├── llmperf
    │   │   ├── __init__.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── common_metrics.py
    │   │   │   └── models.py
    │   ├── envPreCheck
    │   │   └── __init__.py
    │   └── doc
    │   │   └── LLMPerf.md
    ├── .gitignore
    ├── requirements.txt
    ├── pytest.ini
    ├── config.yaml
    └── suites
    │   └── E2E
    │       └── test_evaluator.py
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── 100-documentation.yml
    │   ├── 800-others.yml
    │   ├── 110-user-story.yml
    │   ├── 500-feature-request.yml
    │   ├── 600-new-model.yml
    │   ├── 300-usage.yml
    │   └── 200-installation.yml
    ├── actionlint.yaml
    ├── workflows
    │   ├── matchers
    │   │   ├── mypy.json
    │   │   ├── ruff.json
    │   │   └── actionlint.json
    │   ├── unifiedcache_test.yml
    │   ├── pre-commit.yml
    │   ├── cpp-linter.yml
    │   ├── e2e_test.yml
    │   └── ucmstore.yml
    ├── PULL_REQUEST_TEMPLATE.md
    └── CODEOWNERS
├── MANIFEST.in
├── docs
    ├── source
    │   ├── _static
    │   │   ├── css
    │   │   │   └── logo.css
    │   │   ├── images
    │   │   │   ├── idea.png
    │   │   │   ├── GSA_overview.png
    │   │   │   ├── architecture.png
    │   │   │   ├── blend_scheme.jpg
    │   │   │   ├── kvcomp_scheme.jpg
    │   │   │   ├── kvstar_diagram.png
    │   │   │   ├── prefix_cache.jpg
    │   │   │   ├── ucconn_ucmconn.png
    │   │   │   ├── kvcomp_longbench.jpg
    │   │   │   ├── kvstar_retrieve.png
    │   │   │   ├── nfs_performance.png
    │   │   │   ├── sparse_attn_arch.png
    │   │   │   ├── attention_overhead.png
    │   │   │   ├── attention_sparsity.png
    │   │   │   ├── pd_disaggregation.jpg
    │   │   │   ├── qrcode_for_wechat.png
    │   │   │   ├── mooncake_performance.png
    │   │   │   ├── GSA-E2E-offload-throughput.png
    │   │   │   ├── esa_async_retrieval_and_load.png
    │   │   │   ├── kvcomp_end_to_end_performance.jpg
    │   │   │   ├── GSA-E2E-non-offload-throughput.png
    │   │   │   └── kvcomp_single_layer_performance.jpg
    │   │   └── paper
    │   │   │   └── kvcomp-ACL-2025-paper.pdf
    │   ├── logos
    │   │   ├── UCM-dark.png
    │   │   └── UCM-light.png
    │   ├── about.md
    │   ├── developer-guide
    │   │   └── contribute.md
    │   └── conf.py
    ├── start.cmd
    ├── requirements-docs.txt
    ├── README.md
    ├── Makefile
    └── make.bat
├── requirements-lint.txt
├── docker
    ├── Dockerfile
    └── Dockerfile-NPU
├── .readthedocs.yaml
├── .pre-commit-config.yaml
├── pyproject.toml
├── format.sh
├── .gitignore
├── .clang-format
├── LICENSE
├── examples
    └── ucm_config_example.yaml
└── CMakeLists.txt


/requirements.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/pd/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/benchmarks/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/shared/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/store/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/integration/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/blend/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/esa/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/gsa/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/common/llmperf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/integration/vllm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/shared/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/store/nfsstore/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/store/pcstore/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/common/envPreCheck/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/common/llmperf/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/integration/vllm/patch/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sandbox/agentic_ai/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/esa/retrieval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/gsa/offload_ops/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/gsa/prefetch/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/store/mooncakestore/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/store/mooncakestore/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/hash_retrieval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/integration/vllm/patch/patch_funcs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/integration/vllm/patch/patch_funcs/v092/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(retrieve)
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/.gitignore:
--------------------------------------------------------------------------------
1 | /dataset
2 | /results
3 | */__pycache__


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include CMakeLists.txt
2 | graft ucm
3 | graft examples
4 | graft benchmarks
5 | 


--------------------------------------------------------------------------------
/ucm/sparse/gsa/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(offload_ops)
2 | add_subdirectory(prefetch)
3 | 


--------------------------------------------------------------------------------
/ucm/store/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | if(BUILD_UNIT_TESTS)
2 |     include(GoogleTest)
3 | endif()
4 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/logo.css:
--------------------------------------------------------------------------------
1 | .navbar-brand img {
2 |     max-width: 180px;
3 |     height: auto;
4 | }


--------------------------------------------------------------------------------
/docs/source/logos/UCM-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/logos/UCM-dark.png


--------------------------------------------------------------------------------
/ucm/sparse/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(esa)
2 | add_subdirectory(gsa)
3 | add_subdirectory(kvcomp)
4 | add_subdirectory(kvstar)
5 | 


--------------------------------------------------------------------------------
/docs/source/logos/UCM-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/logos/UCM-light.png


--------------------------------------------------------------------------------
/docs/source/_static/images/idea.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/idea.png


--------------------------------------------------------------------------------
/.github/actionlint.yaml:
--------------------------------------------------------------------------------
1 | self-hosted-runner:
2 |   # Labels of self-hosted runner in array of strings.
3 |   labels:
4 |     - default
5 |     - arc-runner-ucm


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/figs/kvcomp_scheme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sparse/kvcomp/figs/kvcomp_scheme.jpg


--------------------------------------------------------------------------------
/ucm/store/nfsstore/device/simu/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(storedevice STATIC simu_device.cc)
2 | target_link_libraries(storedevice PUBLIC infra_status)
3 | 


--------------------------------------------------------------------------------
/docs/source/_static/images/GSA_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/GSA_overview.png


--------------------------------------------------------------------------------
/docs/source/_static/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/architecture.png


--------------------------------------------------------------------------------
/docs/source/_static/images/blend_scheme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/blend_scheme.jpg


--------------------------------------------------------------------------------
/docs/source/_static/images/kvcomp_scheme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvcomp_scheme.jpg


--------------------------------------------------------------------------------
/docs/source/_static/images/kvstar_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvstar_diagram.png


--------------------------------------------------------------------------------
/docs/source/_static/images/prefix_cache.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/prefix_cache.jpg


--------------------------------------------------------------------------------
/docs/source/_static/images/ucconn_ucmconn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/ucconn_ucmconn.png


--------------------------------------------------------------------------------
/ucm/shared/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(vendor)
2 | add_subdirectory(infra)
3 | add_subdirectory(trans)
4 | add_subdirectory(metrics)
5 | add_subdirectory(test)
6 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/figs/kvcomp_longbench.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sparse/kvcomp/figs/kvcomp_longbench.jpg


--------------------------------------------------------------------------------
/docs/source/_static/images/kvcomp_longbench.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvcomp_longbench.jpg


--------------------------------------------------------------------------------
/docs/source/_static/images/kvstar_retrieve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvstar_retrieve.png


--------------------------------------------------------------------------------
/docs/source/_static/images/nfs_performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/nfs_performance.png


--------------------------------------------------------------------------------
/docs/source/_static/images/sparse_attn_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/sparse_attn_arch.png


--------------------------------------------------------------------------------
/requirements-lint.txt:
--------------------------------------------------------------------------------
1 | # formatting
2 | pre-commit==4.0.1
3 | 
4 | # type checking
5 | mypy==1.11.1
6 | types-PyYAML
7 | types-regex
8 | types-requests
9 | types-setuptools


--------------------------------------------------------------------------------
/docs/source/_static/images/attention_overhead.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/attention_overhead.png


--------------------------------------------------------------------------------
/docs/source/_static/images/attention_sparsity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/attention_sparsity.png


--------------------------------------------------------------------------------
/docs/source/_static/images/pd_disaggregation.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/pd_disaggregation.jpg


--------------------------------------------------------------------------------
/docs/source/_static/images/qrcode_for_wechat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/qrcode_for_wechat.png


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/paper/kvcomp-ACL-2025-paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sparse/kvcomp/paper/kvcomp-ACL-2025-paper.pdf


--------------------------------------------------------------------------------
/docs/source/_static/images/mooncake_performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/mooncake_performance.png


--------------------------------------------------------------------------------
/docs/source/_static/paper/kvcomp-ACL-2025-paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/paper/kvcomp-ACL-2025-paper.pdf


--------------------------------------------------------------------------------
/ucm/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(shared)
2 | if(BUILD_UCM_STORE)
3 |     add_subdirectory(store)
4 | endif()
5 | if(BUILD_UCM_SPARSE)
6 |     add_subdirectory(sparse)
7 | endif()
8 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/misc/flexreduc_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sandbox/sparse/retake/misc/flexreduc_pipeline.png


--------------------------------------------------------------------------------
/docs/source/_static/images/GSA-E2E-offload-throughput.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/GSA-E2E-offload-throughput.png


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/figs/kvcomp_end_to_end_performance.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sparse/kvcomp/figs/kvcomp_end_to_end_performance.jpg


--------------------------------------------------------------------------------
/docs/source/_static/images/esa_async_retrieval_and_load.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/esa_async_retrieval_and_load.png


--------------------------------------------------------------------------------
/docs/source/_static/images/kvcomp_end_to_end_performance.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvcomp_end_to_end_performance.jpg


--------------------------------------------------------------------------------
/ucm/shared/trans/simu/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(trans STATIC
2 |     simu_device.cc
3 |     simu_buffer.cc
4 |     simu_stream.cc
5 | )
6 | target_link_libraries(trans PUBLIC
7 |     fmt
8 | )
9 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/figs/kvcomp_single_layer_performance.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/ucm/sparse/kvcomp/figs/kvcomp_single_layer_performance.jpg


--------------------------------------------------------------------------------
/docs/source/_static/images/GSA-E2E-non-offload-throughput.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/GSA-E2E-non-offload-throughput.png


--------------------------------------------------------------------------------
/docs/source/_static/images/kvcomp_single_layer_performance.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ModelEngine-Group/unified-cache-management/HEAD/docs/source/_static/images/kvcomp_single_layer_performance.jpg


--------------------------------------------------------------------------------
/test/.gitignore:
--------------------------------------------------------------------------------
 1 | reports/
 2 | dataset/
 3 | logs/
 4 | result_outputs/
 5 | results/
 6 | .cache/
 7 | backup/
 8 | sites/Demo/*
 9 | $null
10 | *__pycache__/
11 | .*
12 | *.log
13 | start.bat
14 | !.gitignore


--------------------------------------------------------------------------------
/docs/start.cmd:
--------------------------------------------------------------------------------
1 | pip install -r requirements-docs.txt
2 | start "" /wait cmd /c .\make.bat clean
3 | start "" /wait cmd /c .\make.bat html
4 | start python -m http.server -d build/html/
5 | start http://localhost:8000
6 | 


--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
 1 | sphinx
 2 | sphinx-argparse
 3 | sphinx-book-theme
 4 | sphinx-copybutton
 5 | sphinx-design
 6 | sphinx-togglebutton
 7 | myst-parser
 8 | msgspec
 9 | sphinx-substitution-extensions
10 | sphinx-intl


--------------------------------------------------------------------------------
/ucm/store/detail/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | file(GLOB_RECURSE UCM_STORE_DETAIL_SOURCE "*.*")
2 | add_library(storedetail OBJECT ${UCM_STORE_DETAIL_SOURCE})
3 | target_include_directories(storedetail PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
4 | 


--------------------------------------------------------------------------------
/test/requirements.txt:
--------------------------------------------------------------------------------
 1 | #pytest
 2 | pytest>=7.0.0
 3 | pytest-html>=3.1.1
 4 | PyYAML>=6.0
 5 | #database
 6 | peewee>=3.14.5
 7 | psycopg2-binary>=2.8
 8 | #llmperf
 9 | requests>=2.10.0
10 | pandas>=2.3.0
11 | pydantic>=2.12.0
12 | transformers>=4.0.0


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/task_status.h:
--------------------------------------------------------------------------------
1 | #ifndef KVSTAR_RETRIEVE_CLIB_TASK_STATUS_H
2 | #define KVSTAR_RETRIEVE_CLIB_TASK_STATUS_H
3 | 
4 | namespace KVStar {
5 |     enum class TaskStatus { PENDING, RUNNING, SUCCESS, FAILURE };
6 | }
7 | 
8 | #endif //KVSTAR_RETRIEVE_CLIB_TASK_STATUS_H
9 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==2.4.0
 2 | torchvision==0.19.0
 3 | transformers==4.45.2
 4 | flash-attn==2.6.3
 5 | accelerate==0.34.2
 6 | av==13.1.0
 7 | pyyaml==6.0.2
 8 | opencv-python-headless==4.10.0.84
 9 | pandas==2.2.3
10 | pysubs2==1.7.3
11 | pyarrow==17.0.0
12 | openai==1.56.0
13 | tqdm==4.67.1


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/scripts/infer_eval.sh:
--------------------------------------------------------------------------------
 1 | ckpt_path=$1
 2 | config_path=$2
 3 | num_gpus=$3
 4 | frame_extraction_fps=$4
 5 | 
 6 | PYTHONPATH=$PYTHONPATH:./ python retake/infer_eval.py \
 7 | --hf_qwen2vl7b_path $ckpt_path \
 8 | --config_path $config_path \
 9 | --n_gpus $num_gpus \
10 | --video_frame_extraction_fps $frame_extraction_fps \
11 | "${@:5}"


--------------------------------------------------------------------------------
/ucm/store/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(detail)
2 | add_library(storeintf INTERFACE)
3 | target_include_directories(storeintf INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
4 | target_link_libraries(storeintf INTERFACE storedetail infra_status)
5 | add_subdirectory(nfsstore)
6 | add_subdirectory(pcstore)
7 | add_subdirectory(mooncakestore)
8 | add_subdirectory(test)
9 | 


--------------------------------------------------------------------------------
/.github/workflows/matchers/mypy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "problemMatcher": [
 3 |     {
 4 |       "owner": "mypy",
 5 |       "pattern": [
 6 |         {
 7 |           "regexp": "^(.+):(\\d+):\\s(error|warning):\\s(.+)$",
 8 |           "file": 1,
 9 |           "line": 2,
10 |           "severity": 3,
11 |           "message": 4
12 |         }
13 |       ]
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/simd_compute_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef KVSTAR_RETRIEVE_SIMD_COMPUTE_KERNEL_H
 2 | #define KVSTAR_RETRIEVE_SIMD_COMPUTE_KERNEL_H
 3 | 
 4 | #include "retrieve_task.h"
 5 | #include "task_result.h"
 6 | 
 7 | namespace KVStar {
 8 | 
 9 | void Execute(const RetrieveTask& task, TaskResult& result);
10 | 
11 | }
12 | 
13 | 
14 | #endif //KVSTAR_RETRIEVE_SIMD_COMPUTE_KERNEL_H


--------------------------------------------------------------------------------
/.github/workflows/matchers/ruff.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "problemMatcher": [
 3 |       {
 4 |         "owner": "ruff",
 5 |         "pattern": [
 6 |           {
 7 |             "regexp": "^(.+?):(\\d+):(\\d+): (\\w+): (.+)$",
 8 |             "file": 1,
 9 |             "line": 2,
10 |             "column": 3,
11 |             "code": 4,
12 |             "message": 5
13 |           }
14 |         ]
15 |       }
16 |     ]
17 |   }
18 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Unified Cache Manager documents
 2 | 
 3 | Live doc: Coming soon
 4 | 
 5 | ## Build the docs
 6 | 
 7 | ```bash
 8 | # Install dependencies.
 9 | pip install -r requirements-docs.txt
10 | 
11 | # Build the docs.
12 | make clean
13 | make html
14 | 
15 | 
16 | # Open the docs with your browser
17 | python -m http.server -d build/html/
18 | ```
19 | 
20 | Launch your browser and open:
21 | - English version: http://localhost:8000


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/py_intf/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_library(
 2 |         TORCH_PYTHON_LIB_PATH
 3 |         torch_python
 4 |         HINTS ${TORCH_INSTALL_PREFIX}/lib
 5 | )
 6 | 
 7 | pybind11_add_module(
 8 |         kvstar_retrieve
 9 |         py_intf.cpp
10 | )
11 | 
12 | target_link_libraries(
13 |         kvstar_retrieve
14 |         PRIVATE
15 |         kvstar_retrieve.core
16 |         ${TORCH_PYTHON_LIB_PATH}
17 |         ${Torch_LIBRARIES}
18 | )


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/computation_task.h:
--------------------------------------------------------------------------------
 1 | #ifndef KVSTAR_RETRIEVE_CLIB_COMPUTATION_TASK_H
 2 | #define KVSTAR_RETRIEVE_CLIB_COMPUTATION_TASK_H
 3 | 
 4 | #include <vector>
 5 | #include <cstdint>
 6 | #include <optional>
 7 | 
 8 | namespace KVStar {
 9 | 
10 | struct PlainTensor {
11 |     void* data = nullptr;
12 |     std::vector<int64_t> shape;
13 |     std::vector<int64_t> strides;
14 | };
15 | 
16 | 
17 | }
18 | 
19 | 
20 | 
21 | #endif


--------------------------------------------------------------------------------
/ucm/store/nfsstore/device/musa/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(MUSA_ROOT "/usr/local/musa/" CACHE PATH "Path to MUSA root directory")
 2 | add_library(Musa::musart UNKNOWN IMPORTED)
 3 | set_target_properties(Musa::musart PROPERTIES
 4 |     INTERFACE_INCLUDE_DIRECTORIES "${MUSA_ROOT}/include"
 5 |     IMPORTED_LOCATION "${MUSA_ROOT}/lib/libmusart.so"
 6 | )
 7 | 
 8 | add_library(storedevice STATIC musa_device.cc)
 9 | target_link_libraries(storedevice PUBLIC infra_status Musa::musart)
10 | 


--------------------------------------------------------------------------------
/ucm/shared/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if(BUILD_UNIT_TESTS)
 2 |     include(GoogleTest)
 3 |     file(GLOB_RECURSE UCMSHARED_TEST_SOURCE_FILES "./case/*.cc")
 4 |     add_executable(ucmshared.test ${UCMSHARED_TEST_SOURCE_FILES})
 5 |     target_include_directories(ucmshared.test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/case)
 6 |     target_link_libraries(ucmshared.test PRIVATE
 7 |         trans
 8 |         gtest_main gtest
 9 |     )
10 |     gtest_discover_tests(ucmshared.test)
11 | endif()
12 | 


--------------------------------------------------------------------------------
/.github/workflows/unifiedcache_test.yml:
--------------------------------------------------------------------------------
 1 | name: 'ucm-lint-and-unittest'
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'main'
 7 |       - 'dev*'
 8 |       - '*release'
 9 |       - 'feature*'
10 |   pull_request:
11 |     branches:
12 |       - 'main'
13 |       - 'dev*'
14 |       - '*release'
15 |       - 'feature*'
16 | 
17 | jobs:
18 |   # gpu-test:
19 |   #   uses: ./.github/workflows/e2e_test.yml
20 | 
21 |   call-lint:
22 |     uses: ./.github/workflows/pre-commit.yml
23 | 


--------------------------------------------------------------------------------
/ucm/store/nfsstore/device/cuda/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(CUDA_ROOT "/usr/local/cuda/" CACHE PATH "Path to CUDA root directory")
 2 | set(CMAKE_CUDA_COMPILER ${CUDA_ROOT}/bin/nvcc)
 3 | set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90)
 4 | enable_language(CUDA)
 5 | add_library(storedevice STATIC cuda_device.cu)
 6 | target_link_libraries(storedevice PUBLIC infra_status)
 7 | target_compile_options(storedevice PRIVATE
 8 |     --diag-suppress=128 --diag-suppress=2417 --diag-suppress=2597
 9 |     -Wall -fPIC
10 | )
11 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Set to other image if needed
 2 | FROM vllm/vllm-openai:v0.9.2
 3 | 
 4 | ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 5 | 
 6 | WORKDIR /workspace
 7 | 
 8 | # Install unified-cache-management
 9 | COPY . /workspace/unified-cache-management
10 | 
11 | RUN pip config set global.index-url ${PIP_INDEX_URL}
12 | 
13 | RUN export PLATFORM="cuda" && \
14 |      pip install -v -e /workspace/unified-cache-management --no-build-isolation
15 | 
16 | 
17 | ENTRYPOINT ["/bin/bash"]


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_vl/qwen2-vl_mlvu.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_vl
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | 
 7 | ### dataset
 8 | dataset_name: mlvu
 9 | anno_file: dataset/mlvu/mlvu.json
10 | dataloader_num_workers: 2
11 | 
12 | ### data
13 | sample_fps: 4
14 | max_num_frames: 256
15 | longsize_resolution: 448
16 | 
17 | ### generate
18 | do_sample: false
19 | 
20 | ### output
21 | output_dir: results/qwen2vl_7b_mlvu_f256_4fps_r448/base
22 | 


--------------------------------------------------------------------------------
/ucm/store/nfsstore/device/ascend/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(ASCEND_ROOT "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "Path to Ascend root directory")
 2 | add_library(Ascend::ascendcl UNKNOWN IMPORTED)
 3 | set_target_properties(Ascend::ascendcl PROPERTIES
 4 |     INTERFACE_INCLUDE_DIRECTORIES "${ASCEND_ROOT}/include"
 5 |     IMPORTED_LOCATION "${ASCEND_ROOT}/lib64/libascendcl.so"
 6 | )
 7 | 
 8 | add_library(storedevice STATIC ascend_device.cc)
 9 | target_link_libraries(storedevice PUBLIC infra_status Ascend::ascendcl)
10 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/llava_video/llava-video_mlvu.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: llava_video
 3 | method: retake
 4 | attn_implementation: "flash_attention_2"
 5 | 
 6 | ### dataset
 7 | dataset_name: mlvu
 8 | anno_file: dataset/mlvu/mlvu.json
 9 | dataloader_num_workers: 4
10 | 
11 | ### data
12 | sample_fps: 2
13 | max_num_frames: 64
14 | longsize_resolution: 682 # short-side can be 384
15 | 
16 | ### generate
17 | do_sample: false
18 | 
19 | ### output
20 | output_dir: results/llava-video_mlvu_f64_2fps_r682/base
21 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_5_vl/qwen2-5-vl_videomme_f256.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_5_vl
 3 | method: retake
 4 | attn_implementation: "flash_attention_2"
 5 | 
 6 | ### dataset
 7 | dataset_name: videomme
 8 | anno_file: dataset/video_mme/video_mme.json
 9 | dataloader_num_workers: 4
10 | 
11 | ### data
12 | sample_fps: 2
13 | max_num_frames: 256
14 | longsize_resolution: 448
15 | 
16 | ### generate
17 | do_sample: false
18 | 
19 | ### output
20 | output_dir: results/qwen2_5_vl_7b_videomme_f256_2fps_r448/base
21 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_vl/qwen2-vl_lvbench.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_vl
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | 
 7 | ### dataset
 8 | dataset_name: lvbench
 9 | anno_file: dataset/lvbench/lvbench.json
10 | dataloader_num_workers: 2
11 | 
12 | ### data
13 | sample_fps: 2
14 | max_num_frames: 256
15 | longsize_resolution: 448
16 | 
17 | ### generate
18 | do_sample: false
19 | 
20 | ### output
21 | output_dir: results/qwen2vl_7b_lvbench_f256_2fps_r448/base
22 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/infra/template/singleton.h:
--------------------------------------------------------------------------------
 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_SINGLETON_H
 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_SINGLETON_H
 3 | 
 4 | template <typename T>
 5 | class Singleton {
 6 | public:
 7 |     Singleton(const Singleton&) = delete;
 8 |     Singleton& operator=(const Singleton&) = delete;
 9 |     static T* Instance()
10 |     {
11 |         static T t;
12 |         return &t;
13 |     }
14 | 
15 | private:
16 |     Singleton() = default;
17 | };
18 | 
19 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_SINGLETON_H


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/llava_video/llava-video_lvbench.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: llava_video
 3 | method: retake
 4 | attn_implementation: "flash_attention_2"
 5 | 
 6 | ### dataset
 7 | dataset_name: lvbench
 8 | anno_file: dataset/lvbench/lvbench.json
 9 | dataloader_num_workers: 4
10 | 
11 | ### data
12 | sample_fps: 2
13 | max_num_frames: 64
14 | longsize_resolution: 682 # short-side can be 384
15 | 
16 | ### generate
17 | do_sample: false
18 | 
19 | ### output
20 | output_dir: results/llava-video_lvbench_f64_2fps_r682/base
21 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_vl/qwen2-vl_videomme.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_vl
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | 
 7 | ### dataset
 8 | dataset_name: videomme
 9 | anno_file: dataset/video_mme/video_mme.json
10 | dataloader_num_workers: 2
11 | 
12 | ### data
13 | sample_fps: 4
14 | max_num_frames: 256
15 | longsize_resolution: 448
16 | 
17 | ### generate
18 | do_sample: false
19 | 
20 | ### output
21 | output_dir: results/qwen2vl_7b_videomme_f256_4fps_r448/base
22 | 


--------------------------------------------------------------------------------
/.github/workflows/matchers/actionlint.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "problemMatcher": [
 3 |     {
 4 |       "owner": "actionlint",
 5 |       "pattern": [
 6 |         {
 7 |           "regexp": "^(?:\\x1b\\[\\d+m)?(.+?)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*:(?:\\x1b\\[\\d+m)*(\\d+)(?:\\x1b\\[\\d+m)*: (?:\\x1b\\[\\d+m)*(.+?)(?:\\x1b\\[\\d+m)* \\[(.+?)\\]$",
 8 |           "file": 1,
 9 |           "line": 2,
10 |           "column": 3,
11 |           "message": 4,
12 |           "code": 5
13 |         }
14 |       ]
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/llava_video/llava-video_videomme.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: llava_video
 3 | method: retake
 4 | attn_implementation: "flash_attention_2"
 5 | 
 6 | ### dataset
 7 | dataset_name: videomme
 8 | anno_file: dataset/video_mme/video_mme.json
 9 | dataloader_num_workers: 4
10 | 
11 | ### data
12 | sample_fps: 2
13 | max_num_frames: 64
14 | longsize_resolution: 682 # short-side can be 384
15 | 
16 | ### generate
17 | do_sample: false
18 | 
19 | ### output
20 | output_dir: results/llava-video_video_mme_f64_2fps_r682/base
21 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_runner.h:
--------------------------------------------------------------------------------
 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_RUNNER_H
 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_RUNNER_H
 3 | 
 4 | #include "status/status.h"
 5 | #include "retrieve_task.h"
 6 | #include "task_result.h"
 7 | 
 8 | 
 9 | namespace KVStar {
10 | 
11 | class RetrieveTaskRunner {
12 | public:
13 |     RetrieveTaskRunner(){}
14 |     Status Run(const RetrieveTask& task, TaskResult& result);
15 | };
16 | 
17 | 
18 | }
19 | 
20 | 
21 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_RUNNER_H


--------------------------------------------------------------------------------
/ucm/__init__.py:
--------------------------------------------------------------------------------
 1 | from ucm.integration.vllm.ucm_connector import UCMConnector
 2 | 
 3 | try:
 4 |     from ucm.integration.vllm.patch.apply_patch import ensure_patches_applied
 5 | 
 6 |     ensure_patches_applied()
 7 | except Exception as e:
 8 |     # Don't fail if patches can't be applied - might be running in environment without vLLM
 9 |     import warnings
10 | 
11 |     warnings.warn(
12 |         f"Failed to apply vLLM patches: {e}. "
13 |         f"If you're using vLLM, ensure it's installed and patches are compatible."
14 |     )
15 | 
16 | __all__ = ["UCMConnector"]
17 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/ascend/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(ASCEND_ROOT "/usr/local/Ascend/ascend-toolkit/latest" CACHE PATH "Path to Ascend root directory")
 2 | add_library(Ascend::ascendcl UNKNOWN IMPORTED)
 3 | set_target_properties(Ascend::ascendcl PROPERTIES
 4 |     INTERFACE_INCLUDE_DIRECTORIES "${ASCEND_ROOT}/include"
 5 |     IMPORTED_LOCATION "${ASCEND_ROOT}/lib64/libascendcl.so"
 6 | )
 7 | add_library(trans STATIC
 8 |     ascend_device.cc
 9 |     ascend_buffer.cc
10 |     ascend_stream.cc
11 | )
12 | target_link_libraries(trans PUBLIC
13 |     fmt
14 |     Ascend::ascendcl
15 | )
16 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | file(GLOB_RECURSE CORE_SRC_FILES "*.cpp" "api/*.cpp" "api/**/*.cpp" "domain/*.cpp" "domain/**/*.cpp" "infra/*.cpp" "infra/**/*.cpp")
 2 | 
 3 | add_library(kvstar_retrieve.core STATIC ${CORE_SRC_FILES})
 4 | 
 5 | target_include_directories(kvstar_retrieve.core PUBLIC
 6 |         "." "api" "domain" "infra"
 7 |         ${NUMA_INSTALL_DIR}/include
 8 | )
 9 | 
10 | target_link_libraries(kvstar_retrieve.core PUBLIC
11 |         spdlog::spdlog
12 |         fmt::fmt
13 |         $<$<BOOL:${BUILD_NUMA}>:${NUMA_INSTALL_DIR}/lib/libnuma.so>
14 |         ${Torch_LIBRARIES}
15 | )
16 | 


--------------------------------------------------------------------------------
/docs/source/about.md:
--------------------------------------------------------------------------------
1 | # About Us
2 | 
3 | UCM is rooted in KV Cache, with the goal of reducing inference costs and building commercially viable inference
4 | solutions. It enhances throughput through methods such as Prefix Cache, sparsification, and PD Disaggregation.
5 | 
6 | The UCM team consists of a group of "lazy" people who love simple things and also enjoy "borrowing" the excellent
7 | experiences of others. Adhering to the principle of full openness, we hope everyone will generously share their
8 | insights. We also welcome everyone to learn from these experiences together, engage in discussions, and help us make
9 | progress.


--------------------------------------------------------------------------------
/docker/Dockerfile-NPU:
--------------------------------------------------------------------------------
 1 | # Set to other image if needed
 2 | FROM quay.io/ascend/vllm-ascend:v0.9.2rc1
 3 | 
 4 | ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 5 | 
 6 | WORKDIR /workspace
 7 | 
 8 | # Install unified-cache-management
 9 | COPY . /workspace/unified-cache-management
10 | 
11 | RUN pip config set global.index-url ${PIP_INDEX_URL}
12 | 
13 | RUN export PLATFORM="ascend" && \
14 |     export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
15 |     pip install -v -e /workspace/unified-cache-management --no-build-isolation
16 | 
17 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/task_result.h:
--------------------------------------------------------------------------------
 1 | #ifndef KVSTAR_RETRIEVE_CLIB_TASK_RESULT_H
 2 | #define KVSTAR_RETRIEVE_CLIB_TASK_RESULT_H
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include <atomic>
 7 | #include <mutex>
 8 | #include <cstdint>
 9 | #include "domain/retrieve_task/task_status.h"
10 | 
11 | 
12 | namespace KVStar {
13 | struct TaskResult {
14 |     std::atomic<TaskStatus> status{TaskStatus::PENDING};
15 |     std::vector<int64_t> topkIndices;
16 |     std::string errorMessage;
17 |     std::mutex mtx;
18 |     TaskResult() = default;
19 | };
20 | 
21 | } // namespace KVStar
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/infra/memory/memory.cpp:
--------------------------------------------------------------------------------
 1 | #include "memory.h"
 2 | #include <cstdlib>
 3 | 
 4 | namespace KVStar {
 5 | 
 6 | std::shared_ptr<void> MakePtr(void *ptr) {
 7 |     if (!ptr) { return nullptr; }
 8 |     return std::shared_ptr<void>(ptr, [](void *ptr) { free(ptr); });
 9 | }
10 | 
11 | std::shared_ptr<void> Memory::Alloc(const size_t size) { return MakePtr(malloc(size)); }
12 | 
13 | std::shared_ptr<void> Memory::AllocAlign(const size_t size) {
14 |     void *ptr = nullptr;
15 |     auto ret = posix_memalign(&ptr, _alignment, size);
16 |     if (ret != 0) { return nullptr; }
17 |     return MakePtr(ptr);
18 | }
19 | }


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/environment_npu.yaml:
--------------------------------------------------------------------------------
 1 | name: retake
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python==3.11
 6 |   - pip:
 7 |       - numpy==1.26.4
 8 |       - scipy==1.14.1
 9 |       - torch==2.4.0
10 |       - torch-npu==2.4.0
11 |       - torchvision==0.19.0
12 |       - transformers==4.45.2
13 |       - accelerate==0.34.2
14 |       - av==13.1.0
15 |       - pyyaml==6.0.2
16 |       - opencv-python-headless==4.10.0.84
17 |       - pandas==2.2.3
18 |       - pysubs2==1.7.3
19 |       - pyarrow==17.0.0
20 |       - openai==1.56.0
21 |       - tqdm==4.67.1
22 |       - attrs==23.2.0
23 |       - decorator==5.2.1
24 | 


--------------------------------------------------------------------------------
/ucm/sparse/esa/retrieval/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # 添加编译目标
 2 | pybind11_add_module(retrieval_backend cpy/retrieval_backend.cpp)
 3 | 
 4 | # 设置输出库的目录
 5 | file(RELATIVE_PATH INSTALL_REL_PATH
 6 |      ${CMAKE_SOURCE_DIR}
 7 |      ${CMAKE_CURRENT_SOURCE_DIR}
 8 | )
 9 | install(TARGETS retrieval_backend LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm)
10 | 
11 | # 设置头文件目录，以确保 numaf.h 能找到
12 | target_include_directories(retrieval_backend PUBLIC
13 |     ${NUMA_INSTALL_DIR}/include
14 |     ${Torch_INCLUDE_DIRS}
15 | )
16 | 
17 | # 链接所需的库
18 | target_link_libraries(retrieval_backend PUBLIC
19 |     $<$<BOOL:${BUILD_NUMA}>:${NUMA_INSTALL_DIR}/lib/libnuma.so>
20 |     ${Torch_LIBRARIES}
21 | )
22 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/hash_retrieval/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # 添加编译目标
 2 | pybind11_add_module(hash_retrieval_backend cpy/hash_retrieval_backend.cpp)
 3 | 
 4 | file(RELATIVE_PATH INSTALL_REL_PATH
 5 |      ${CMAKE_SOURCE_DIR}
 6 |      ${CMAKE_CURRENT_SOURCE_DIR}
 7 | )
 8 | install(TARGETS hash_retrieval_backend LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm)
 9 | 
10 | # 设置头文件目录，以确保 numaf.h 能找到
11 | target_include_directories(hash_retrieval_backend PUBLIC
12 |     ${NUMA_INSTALL_DIR}/include
13 |     ${Torch_INCLUDE_DIRS}
14 | )
15 | 
16 | # 链接所需的库
17 | target_link_libraries(hash_retrieval_backend PUBLIC
18 |     $<$<BOOL:${BUILD_NUMA}>:${NUMA_INSTALL_DIR}/lib/libnuma.so>
19 |     ${Torch_LIBRARIES}
20 | )


--------------------------------------------------------------------------------
/test/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | testpaths = suites
 3 | python_files = test_*.py
 4 | python_classes = Test*
 5 | python_functions = test_*
 6 | 
 7 | addopts =
 8 |     -ra
 9 |     --capture=no
10 | filterwarnings =
11 |     ignore::pytest.PytestReturnNotNoneWarning
12 | 
13 | log_cli = 1
14 | log_cli_level = INFO
15 | log_cli_format = [%(levelname)s] %(name)s: %(message)s
16 | norecursedirs = .git venv env __pycache__ *.egg
17 | 
18 | markers =
19 |     # -------- Levels (Required) --------
20 |     stage: Unit/Smoke/Regression/Release (0=Unit 1=Smoke 2=Regression 3=Release)
21 |     # -------- Features (Recommended) --------
22 |     feature:     Feature tag
23 |     platform: Platform tag(gpu/npu)
24 | # end of markers


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version, and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.12"
12 | 
13 | # Build documentation in the "docs/" directory with Sphinx
14 | sphinx:
15 |    configuration: docs/source/conf.py
16 | 
17 | # Optionally, but recommended,
18 | # declare the Python requirements required to build your documentation
19 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
20 | python:
21 |    install:
22 |    - requirements: docs/requirements-docs.txt
23 |         


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/infra/memory/memory.h:
--------------------------------------------------------------------------------
 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_MEMORY_H
 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_MEMORY_H
 3 | 
 4 | #include <memory>
 5 | #include <cstddef>
 6 | 
 7 | namespace KVStar {
 8 | 
 9 | class Memory {
10 | public:
11 |     static bool Aligned(const size_t size) { return size % _alignment == 0;}
12 |     static size_t Align(const size_t size) { return (size + _alignment - 1) / _alignment * _alignment; }
13 |     static std::shared_ptr<void> Alloc(const size_t size);
14 |     static std::shared_ptr<void> AllocAlign(const size_t size);
15 | 
16 | private:
17 |     static constexpr size_t _alignment{4096};
18 | };
19 | }
20 | 
21 | 
22 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_MEMORY_H


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/100-documentation.yml:
--------------------------------------------------------------------------------
 1 | name: 📚 Documentation
 2 | description: Report an issue related to ucm official website
 3 | title: "[Doc]: "
 4 | labels: ["documentation"]
 5 | 
 6 | body:
 7 | - type: textarea
 8 |   attributes:
 9 |     label: 📚 The doc issue
10 |     description: >
11 |       A clear and concise description of what content in unifiedcache official website is an issue.
12 |   validations:
13 |     required: true
14 | - type: textarea
15 |   attributes:
16 |     label: Suggest a potential alternative/fix
17 |     description: >
18 |       Tell us how we could improve the documentation in this regard.
19 | - type: markdown
20 |   attributes:
21 |     value: >
22 |       Thanks for contributing 🎉!
23 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/ucm/store/pcstore/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | file(GLOB_RECURSE UCMSTORE_PC_CC_SOURCE_FILES "./cc/*.cc")
 2 | add_library(pcstore STATIC ${UCMSTORE_PC_CC_SOURCE_FILES})
 3 | target_include_directories(pcstore PUBLIC
 4 |     ${CMAKE_CURRENT_SOURCE_DIR}/cc/api
 5 |     ${CMAKE_CURRENT_SOURCE_DIR}/cc/domain
 6 | )
 7 | target_link_libraries(pcstore PUBLIC storeintf trans infra_logger)
 8 | 
 9 | file(GLOB_RECURSE UCMSTORE_PC_CPY_SOURCE_FILES "./cpy/*.cc")
10 | pybind11_add_module(ucmpcstore ${UCMSTORE_PC_CPY_SOURCE_FILES})
11 | target_link_libraries(ucmpcstore PRIVATE pcstore)
12 | 
13 | file(RELATIVE_PATH INSTALL_REL_PATH
14 |      ${CMAKE_SOURCE_DIR}
15 |      ${CMAKE_CURRENT_SOURCE_DIR}
16 | )
17 | install(TARGETS ucmpcstore LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm)
18 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/codespell-project/codespell
 3 |     rev: v2.4.1
 4 |     hooks:
 5 |       - id: codespell
 6 |         args: [
 7 |           '--skip', 'ucm/csrc/**,./ucm.egg-info/**,.github/**',
 8 |           '-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn'
 9 |         ]
10 |   - repo: https://github.com/psf/black
11 |     rev: 24.4.2
12 |     hooks:
13 |       - id: black
14 |         language_version: python3
15 |   - repo: https://github.com/PyCQA/isort
16 |     rev: 6.0.1
17 |     hooks:
18 |       - id: isort
19 |         args:
20 |             - "--profile=black"
21 |   - repo: https://github.com/rhysd/actionlint
22 |     rev: v1.7.7
23 |     hooks:
24 |       - id: actionlint
25 | default_stages:
26 |   - pre-commit
27 |   - manual


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools>=64",
 4 |     "cmake>=3.18",
 5 |     "wheel",
 6 | ]
 7 | build-backend = "setuptools.build_meta"
 8 | 
 9 | [project]
10 | name = "uc-manager"
11 | authors = [{name = "Unified Cache Team"}]
12 | license = { file="LICENSE" }
13 | readme = "README.md"
14 | description = "Persist and reuse KV Cache to speedup your LLM."
15 | requires-python = ">=3.10"
16 | dynamic = [ "version", "dependencies", "optional-dependencies"]
17 | 
18 | [project.urls]
19 | Homepage="https://github.com/ModelEngine-Group/unified-cache-management"
20 | Documentation="https://ucm.readthedocs.io/en/latest"
21 | WeChat="https://github.com/ModelEngine-Group/unified-cache-management/blob/develop/docs/source/_static/images/qrcode_for_wechat.png?raw=true"
22 | 


--------------------------------------------------------------------------------
/test/common/llmperf/utils/common_metrics.py:
--------------------------------------------------------------------------------
 1 | # TODO (Avnishn): compute metrics in class
 2 | INTER_TOKEN_LAT = "inter_token_latency_s"
 3 | TTFT = "ttft_s"
 4 | E2E_LAT = "end_to_end_latency_s"
 5 | NUM_INPUT_TOKENS = "number_input_tokens"
 6 | NUM_OUTPUT_TOKENS = "number_output_tokens"
 7 | NUM_TOTAL_TOKENS = "number_total_tokens"
 8 | REQ_OUTPUT_THROUGHPUT = "request_output_throughput_token_per_s"
 9 | ERROR_MSG = "error_msg"
10 | ERROR_CODE = "error_code"
11 | ERROR_CODE_FREQ = "error_code_frequency"
12 | NUM_ERRORS = "number_errors"
13 | OUTPUT_THROUGHPUT = "mean_output_throughput_token_per_s"
14 | NUM_COMPLETED_REQUESTS = "num_completed_requests"
15 | COMPLETED_REQUESTS_PER_MIN = "num_completed_requests_per_min"
16 | ERROR_RATE = "error_rate"
17 | NUM_REQ_STARTED = "num_requests_started"
18 | 


--------------------------------------------------------------------------------
/format.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | check_command() {
 5 |     if ! command -v "$1" &> /dev/null; then
 6 |         echo "❓❓$1 is not installed, please run:"
 7 |         echo "# Install lint deps"
 8 |         echo "pip install -r requirements-lint.txt"
 9 |         echo "# (optional) Enable git commit pre check"
10 |         echo "pre-commit install"
11 |         echo ""
12 |         echo "See step by step contribution guide:"
13 |         echo "Unifiedcache Official Website"
14 |         exit 1
15 |     fi
16 | }
17 | 
18 | check_command pre-commit
19 | 
20 | # TODO: cleanup SC exclude
21 | export SHELLCHECK_OPTS="--exclude=SC2046,SC2006,SC2086"
22 | if [[ "$1" != 'ci' ]]; then
23 |     pre-commit run --all-files
24 | else
25 |     pre-commit run --all-files --hook-stage manual
26 | fi


--------------------------------------------------------------------------------
/ucm/shared/trans/cuda/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(CUDA_ROOT "/usr/local/cuda/" CACHE PATH "Path to CUDA root directory")
 2 | set(CMAKE_CUDA_COMPILER ${CUDA_ROOT}/bin/nvcc)
 3 | set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90)
 4 | enable_language(CUDA)
 5 | add_library(kernel OBJECT cuda_sm_kernel.cu)
 6 | target_compile_options(kernel PRIVATE
 7 |     --diag-suppress=128 --diag-suppress=2417 --diag-suppress=2597
 8 |     -Wall -fPIC
 9 | )
10 | add_library(trans STATIC
11 |     cuda_device.cc
12 |     cuda_buffer.cc
13 |     cuda_stream.cc
14 |     cuda_sm_stream.cc
15 | )
16 | target_include_directories(trans PUBLIC ${CUDA_ROOT}/include)
17 | target_link_directories(trans PUBLIC ${CUDA_ROOT}/lib64)
18 | target_link_libraries(trans PUBLIC
19 |     fmt
20 |     cudart
21 |     nvidia-ml
22 |     kernel
23 | )
24 | 


--------------------------------------------------------------------------------
/ucm/store/nfsstore/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_subdirectory(device)
 2 | file(GLOB_RECURSE UCMSTORE_NFS_CC_SOURCE_FILES "./cc/*.cc")
 3 | add_library(nfsstore STATIC ${UCMSTORE_NFS_CC_SOURCE_FILES})
 4 | target_include_directories(nfsstore PUBLIC
 5 |     ${CMAKE_CURRENT_SOURCE_DIR}
 6 |     ${CMAKE_CURRENT_SOURCE_DIR}/cc/api
 7 |     ${CMAKE_CURRENT_SOURCE_DIR}/cc/domain
 8 | )
 9 | target_link_libraries(nfsstore PUBLIC storeintf storedevice infra_logger)
10 | 
11 | file(GLOB_RECURSE UCMSTORE_NFS_CPY_SOURCE_FILES "./cpy/*.cc")
12 | pybind11_add_module(ucmnfsstore ${UCMSTORE_NFS_CPY_SOURCE_FILES})
13 | target_link_libraries(ucmnfsstore PRIVATE nfsstore)
14 | 
15 | file(RELATIVE_PATH INSTALL_REL_PATH
16 |      ${CMAKE_SOURCE_DIR}
17 |      ${CMAKE_CURRENT_SOURCE_DIR}
18 | )
19 | install(TARGETS ucmnfsstore LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm)
20 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/demo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | method: retake
 3 | scaling_factor: 4
 4 | attn_implementation: "flash_attention_2"
 5 | longvideo_kwargs: {
 6 |   'frame_chunk_size': 64,
 7 |   'chunked_prefill_frames': 32,
 8 |   # KVCache compression
 9 |   'kvcache_compression': True,
10 |   'kvcache_compression_kwargs': {
11 |     'compression_method': 'stdvidlkv',
12 |     'dynamic_compression_ratio': True,
13 |     'prompt_guided_compression': True,
14 |     'pos_embed_reforge': False,
15 |     'max_input_length': 16000,
16 |     # Temporal
17 |     'enable_temporal_adaptation': True,
18 |     'temporal_adaptation_ratio': 4,
19 |     # Layer
20 |     'budget_allocation_method': 'adakv',
21 |   },
22 | }
23 | 
24 | ### data
25 | sample_fps: 4
26 | max_num_frames: 2048
27 | longsize_resolution: 448
28 | 
29 | ### generate
30 | do_sample: false


--------------------------------------------------------------------------------
/ucm/store/nfsstore/device/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if(RUNTIME_ENVIRONMENT STREQUAL "ascend")
 2 |     add_subdirectory(ascend)
 3 | elseif(RUNTIME_ENVIRONMENT STREQUAL "musa")
 4 |     add_subdirectory(musa)
 5 | elseif(RUNTIME_ENVIRONMENT STREQUAL "maca")
 6 |     add_subdirectory(maca)
 7 | elseif(RUNTIME_ENVIRONMENT STREQUAL "cuda")
 8 |     add_subdirectory(cuda)
 9 | elseif(RUNTIME_ENVIRONMENT STREQUAL "simu")
10 |     add_subdirectory(simu)
11 | else()
12 |     message(FATAL_ERROR "RUNTIME_ENVIRONMENT must be one of: ascend, musa, cuda, simu. Current value: ${RUNTIME_ENVIRONMENT}")
13 | endif()
14 | 
15 | if(TARGET storedevice)
16 |     target_include_directories(storedevice PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
17 | else()
18 |     message(FATAL_ERROR "storedevice target was not created. Check RUNTIME_ENVIRONMENT setting and subdirectory CMakeLists.txt files.")
19 | endif()
20 | 


--------------------------------------------------------------------------------
/ucm/store/nfsstore/device/maca/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(CUDA_ROOT "/opt/maca/tools/cu-bridge" CACHE PATH "Path to WCUDA root directory")
 2 | set(CMAKE_CUDA_COMPILER ${CUDA_ROOT}/bin/cucc)
 3 | list(APPEND CMAKE_MODULE_PATH "${CUDA_ROOT}/cmake_module/maca")
 4 | set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90)
 5 | enable_language(CUDA)
 6 | 
 7 | add_library(storedevice STATIC maca_device.cu)
 8 | 
 9 | add_library(WCUDA::cudart UNKNOWN IMPORTED)
10 | set_target_properties(WCUDA::cudart PROPERTIES
11 | 	INTERFACE_INCLUDE_DIRECTORIES "${CUDA_ROOT}/include"
12 | 	IMPORTED_LOCATION "${CUDA_ROOT}/lib/libcuda.so"
13 | )
14 | target_include_directories(WCUDA::cudart INTERFACE
15 | 	/opt/maca/include
16 | 	/opt/maca/include/mcr
17 | )
18 | 
19 | target_link_libraries(storedevice PUBLIC infra_status WCUDA::cudart)
20 | target_compile_options(storedevice PRIVATE -Wall -fPIC -std=c++17)
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Linker files
15 | *.ilk
16 | 
17 | # Debugger Files
18 | *.pdb
19 | 
20 | # Compiled Dynamic libraries
21 | *.so
22 | *.dylib
23 | *.dll
24 | 
25 | # Fortran module files
26 | *.mod
27 | *.smod
28 | 
29 | # Compiled Static libraries
30 | *.lai
31 | *.la
32 | *.a
33 | *.lib
34 | 
35 | # Executables
36 | *.exe
37 | *.out
38 | *.app
39 | 
40 | # Debug information files
41 | *.dwo
42 | 
43 | # Development environment files
44 | *.code-workspace
45 | .vscode/**
46 | .idea/**
47 | .git/**
48 | **/build/**
49 | **/output/**
50 | .venv/**
51 | **/__pycache__/**
52 | *.egg-info/**
53 | reports/
54 | dataset/
55 | logs/
56 | .*
57 | *.log
58 | result_outputs/
59 | results/
60 | .cache/
61 | backup/
62 | $null
63 | *__pycache__/


--------------------------------------------------------------------------------
/ucm/shared/trans/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if(RUNTIME_ENVIRONMENT STREQUAL "ascend")
 2 |     add_subdirectory(ascend)
 3 | endif()
 4 | if(RUNTIME_ENVIRONMENT STREQUAL "maca")
 5 |     add_subdirectory(maca)
 6 | endif()
 7 | if(RUNTIME_ENVIRONMENT STREQUAL "cuda")
 8 |     add_subdirectory(cuda)
 9 | endif()
10 | if(RUNTIME_ENVIRONMENT STREQUAL "simu")
11 |     add_subdirectory(simu)
12 | endif()
13 | target_include_directories(trans PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/..)
14 | target_link_libraries(trans PUBLIC infra_status)
15 | 
16 | file(GLOB_RECURSE UCMTRANS_CPY_SOURCE_FILES "./cpy/*.cc")
17 | pybind11_add_module(ucmtrans ${UCMTRANS_CPY_SOURCE_FILES})
18 | target_link_libraries(ucmtrans PRIVATE trans)
19 | 
20 | file(RELATIVE_PATH INSTALL_REL_PATH
21 |      ${CMAKE_SOURCE_DIR}
22 |      ${CMAKE_CURRENT_SOURCE_DIR}
23 | )
24 | install(TARGETS ucmtrans LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm)


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/800-others.yml:
--------------------------------------------------------------------------------
 1 | name: 🎲 Others
 2 | description: Submit a discussion as you like. Note that developers are heavily overloaded and we mainly rely on community users to answer these issues.
 3 | title: "[Misc]: "
 4 | labels: ["misc"]
 5 | 
 6 | body:
 7 | - type: markdown
 8 |   attributes:
 9 |     value: >
10 |       #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/ModelEngine-Group/unified-cache-management/issues?q=is%3Aissue%20sort%3Acreated-desc).
11 | - type: textarea
12 |   attributes:
13 |     label: Anything you want to discuss about ucm.
14 |     description: >
15 |       Anything you want to discuss about unifiedcache.
16 |   validations:
17 |     required: true
18 | - type: markdown
19 |   attributes:
20 |     value: >
21 |       Thanks for contributing 🎉!
22 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/infra/logger/logger.h:
--------------------------------------------------------------------------------
 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_LOGGER_H
 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_LOGGER_H
 3 | 
 4 | #include <spdlog/spdlog.h>
 5 | 
 6 | namespace KVStar {
 7 | 
 8 | class Logger {
 9 | public:
10 |     static std::shared_ptr<spdlog::logger> Make();
11 | };
12 | 
13 | }
14 | 
15 | #define KVSTAR_LOG(level, ...)                                                                                             \
16 | KVStar::Logger::Make()->log(spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, level, __VA_ARGS__)
17 | #define KVSTAR_DEBUG(...) KVSTAR_LOG(spdlog::level::debug, __VA_ARGS__)
18 | #define KVSTAR_INFO(...) KVSTAR_LOG(spdlog::level::info, __VA_ARGS__)
19 | #define KVSTAR_WARN(...) KVSTAR_LOG(spdlog::level::warn, __VA_ARGS__)
20 | #define KVSTAR_ERROR(...) KVSTAR_LOG(spdlog::level::err, __VA_ARGS__)
21 | 
22 | 
23 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_LOGGER_H


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/demo_npu.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | method: retake
 3 | scaling_factor: 4
 4 | # attn_implementation: "sdpa"
 5 | attn_implementation: "eager" # If your NPU does not support sdpa attention
 6 | longvideo_kwargs: {
 7 |   'frame_chunk_size': 16,
 8 |   'chunked_prefill_frames': 16,
 9 |   # KVCache compression
10 |   'kvcache_compression': True,
11 |   'kvcache_compression_kwargs': {
12 |     'compression_method': 'stdvidlkv',
13 |     'dynamic_compression_ratio': True,
14 |     'prompt_guided_compression': True,
15 |     'pos_embed_reforge': False,
16 |     'max_input_length': 16000,
17 |     # Temporal
18 |     'enable_temporal_adaptation': True,
19 |     'temporal_adaptation_ratio': 4,
20 |     # Layer
21 |     'budget_allocation_method': 'adakv',
22 |   },
23 | }
24 | 
25 | ### data
26 | sample_fps: 4
27 | max_num_frames: 2048
28 | longsize_resolution: 448
29 | 
30 | ### generate
31 | do_sample: false


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/infra/thread/latch.h:
--------------------------------------------------------------------------------
 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_LATCH_H
 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_LATCH_H
 3 | 
 4 | #include <atomic>
 5 | #include <condition_variable>
 6 | #include <mutex>
 7 | 
 8 | namespace KVStar {
 9 | class Latch {
10 | public:
11 |     explicit Latch(const size_t expected = 0) : _counter{expected} {}
12 |     void Up() { ++this->_counter; }
13 |     size_t Done() { return --this->_counter; }
14 |     void Notify() { this->_cv.notify_all(); }
15 |     void Wait()
16 |     {
17 |         std::unique_lock<std::mutex> lk(this->_mutex);
18 |         if (this->_counter == 0) { return; }
19 |         this->_cv.wait(lk, [this] { return this->_counter == 0; });
20 |     }
21 | 
22 | private:
23 |     std::mutex _mutex;
24 |     std::condition_variable _cv;
25 |     std::atomic<size_t> _counter;
26 | };
27 | 
28 | }
29 | 
30 | 
31 | 
32 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_LATCH_H


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_vl/retake_qwen2-vl_videomme.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_vl
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | longvideo_kwargs: {
 7 |   'frame_chunk_size': 128,
 8 |   'chunked_prefill_frames': 32,
 9 |   # KVCache compression
10 |   'kvcache_compression': True,
11 |   'kvcache_compression_kwargs': {
12 |     'dynamic_compression_ratio': True,
13 |     'compression_method': 'pivotkv',
14 |     'pos_embed_reforge': True,
15 |     'max_input_length': 32000
16 |   },
17 | }
18 | 
19 | 
20 | ### dataset
21 | dataset_name: videomme
22 | anno_file: dataset/video_mme/video_mme.json
23 | dataloader_num_workers: 2
24 | 
25 | ### data
26 | sample_fps: 4
27 | max_num_frames: 2048
28 | longsize_resolution: 448
29 | 
30 | ### generate
31 | do_sample: false
32 | 
33 | ### output
34 | output_dir: results/qwen2vl_7b_video_mme_f2048_4fps_r448/retake_pivot-32k
35 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | 
 3 | on:
 4 |     workflow_call:
 5 | 
 6 | permissions:
 7 |   contents: read
 8 | 
 9 | jobs:
10 |   pre-commit:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - name: Checkout repository
14 |       uses: actions/checkout@v4
15 | 
16 |     - name: Set up Python
17 |       uses: actions/setup-python@v5
18 |       with:
19 |         python-version: "3.12"
20 | 
21 |     - name: Add matchers for better error display
22 |       run: |
23 |         echo "::add-matcher::.github/workflows/matchers/actionlint.json"
24 |         echo "::add-matcher::.github/workflows/matchers/mypy.json"
25 | 
26 |     - name: Run pre-commit checks on all files
27 |       uses: pre-commit/action@v3.0.1
28 |       env:
29 |         SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
30 |       with:
31 |         extra_args: --all-files --hook-stage manual
32 | 


--------------------------------------------------------------------------------
/docs/source/developer-guide/contribute.md:
--------------------------------------------------------------------------------
 1 | # How to contribute
 2 | ## Building and testing
 3 | It’s recommended to set up a local development environment to build and test before you submit a PR.
 4 | ### Run lint locally
 5 | Run following commands to format your code before submit:
 6 | ```bash
 7 | # Choose a base dir (~/vllm-project/) and set up venv
 8 | cd ~/vllm-project/
 9 | python3 -m venv .venv
10 | source ./.venv/bin/activate
11 | 
12 | # Clone UCM and install
13 | git clone https://github.com/ModelEngine-Group/unified-cache-management.git 
14 | cd unified-cache-management
15 | 
16 | # Install lint requirement and enable pre-commit hook
17 | pip install -r requirements-lint.txt
18 | 
19 | # Run lint (You need install pre-commits deps via proxy network at first time)
20 | bash format.sh
21 | ```
22 | ### Run unit test locally
23 | Run unit test locally with following command:
24 | ```bash
25 | python3 -m unittest discover -s test
26 | ```
27 | 


--------------------------------------------------------------------------------
/test/common/llmperf/utils/models.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Optional, Tuple
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class RequestConfig(BaseModel):
 7 |     """The configuration for a request to the LLM API.
 8 | 
 9 |     Args:
10 |         model: The model to use.
11 |         prompt: The prompt to provide to the LLM API.
12 |         sampling_params: Additional sampling parameters to send with the request.
13 |             For more information see the Router app's documentation for the completions
14 |         llm_api: The name of the LLM API to send the request to.
15 |         metadata: Additional metadata to attach to the request for logging or validation purposes.
16 |     """
17 | 
18 |     model: str
19 |     prompt: Tuple[str, int]
20 |     sampling_params: Optional[Dict[str, Any]] = None
21 |     llm_api: Optional[str] = None
22 |     metadata: Optional[Dict[str, Any]] = None
23 |     openai_api_base: Optional[str] = ""
24 | 


--------------------------------------------------------------------------------
/.github/workflows/cpp-linter.yml:
--------------------------------------------------------------------------------
 1 | name: cpp-linter
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "*" ]
 6 |   pull_request:
 7 |     branches: [ "dev*", "main", "*release", "feature*" ]
 8 | 
 9 | 
10 | jobs:
11 |   cpp-linter:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3  # v6.0.0
15 |         with:
16 |           persist-credentials: false
17 |       - uses: cpp-linter/cpp-linter-action@main
18 |         id: linter
19 |         continue-on-error: true
20 |         env:
21 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22 |         with:
23 |           style: file
24 |           tidy-checks: '-*'
25 |           files-changed-only: true
26 |           lines-changed-only: diff
27 |           format-review: true
28 |           version: 20
29 | 
30 |       - name: Fail fast?!
31 |         if: steps.linter.outputs.checks-failed != 0
32 |         run: |
33 |           echo "some linter checks failed. ${{ steps.linter.outputs.checks-failed }}"
34 |           exit 1
35 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_waiter.h:
--------------------------------------------------------------------------------
 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_WAITER_H
 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_WAITER_H
 3 | 
 4 | #include <spdlog/fmt/fmt.h>
 5 | #include <spdlog/stopwatch.h>
 6 | #include "logger/logger.h"
 7 | #include "thread/latch.h"
 8 | 
 9 | namespace KVStar {
10 | 
11 | class RetrieveTaskWaiter : public Latch {
12 | public:
13 |     RetrieveTaskWaiter(const size_t taskId, const size_t waitCounter)
14 |         : Latch{waitCounter}, _taskId{taskId}, _waitCounter{waitCounter}
15 |     {
16 |     }
17 | 
18 |     void Done()
19 |     {
20 |         if (Latch::Done() == 0) {
21 |             KVSTAR_DEBUG("Task({}, {}) finished, elapsed {:.06f}s", this->_taskId, this->_waitCounter, this->_sw.elapsed().count());
22 |             this->Notify();
23 |         }
24 |     }
25 | 
26 | private:
27 |     size_t _taskId;
28 |     size_t _waitCounter;
29 |     spdlog::stopwatch _sw;
30 | 
31 | };
32 | 
33 | }
34 | 
35 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_WAITER_H
36 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
 1 | BasedOnStyle: Google
 2 | IndentWidth: 4
 3 | ColumnLimit: 100
 4 | AccessModifierOffset: -4
 5 | AlwaysBreakTemplateDeclarations: true
 6 | PointerAlignment: Left
 7 | AlignArrayOfStructures: Left
 8 | AllowShortBlocksOnASingleLine: true
 9 | AllowShortCaseLabelsOnASingleLine: true
10 | AllowShortFunctionsOnASingleLine: All
11 | AllowShortIfStatementsOnASingleLine: true
12 | AllowShortLoopsOnASingleLine: true
13 | IncludeBlocks: Merge
14 | IncludeCategories:
15 |     - Regex: '<.*>'
16 |       Priority: 2
17 |     - Regex: '.*'
18 |       Priority: 3
19 | BreakBeforeBraces: Custom
20 | BraceWrapping:
21 |     AfterClass: false
22 |     AfterControlStatement: false
23 |     AfterEnum: false
24 |     AfterFunction: true
25 |     AfterNamespace: false
26 |     AfterObjCDeclaration: false
27 |     AfterStruct: false
28 |     AfterUnion: false
29 |     AfterExternBlock: false
30 |     BeforeCatch: false
31 |     BeforeElse: false
32 |     IndentBraces: false
33 |     SplitEmptyFunction: true
34 |     SplitEmptyRecord: true
35 |     SplitEmptyNamespace: true
36 | 


--------------------------------------------------------------------------------
/ucm/shared/metrics/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | file(GLOB_RECURSE CORE_SRCS CONFIGURE_DEPENDS
 2 |      "${CMAKE_CURRENT_SOURCE_DIR}/cc/stats/*.cc"
 3 |      "${CMAKE_CURRENT_SOURCE_DIR}/cc/*.cc")
 4 | add_library(monitor_static STATIC ${CORE_SRCS})
 5 | set_property(TARGET monitor_static PROPERTY POSITION_INDEPENDENT_CODE ON)
 6 | target_include_directories(monitor_static PUBLIC
 7 |     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/cc>
 8 |     $<INSTALL_INTERFACE:include>)
 9 | set_target_properties(monitor_static PROPERTIES OUTPUT_NAME monitor)
10 | 
11 | file(GLOB_RECURSE BINDINGS_SRCS CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/cpy/*.cc")
12 | pybind11_add_module(ucmmonitor ${BINDINGS_SRCS})
13 | target_link_libraries(ucmmonitor PRIVATE -Wl,--whole-archive monitor_static -Wl,--no-whole-archive)
14 | target_include_directories(ucmmonitor PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cc)
15 | 
16 | file(RELATIVE_PATH INSTALL_REL_PATH
17 |      ${CMAKE_SOURCE_DIR}
18 |      ${CMAKE_CURRENT_SOURCE_DIR}
19 | )
20 | install(TARGETS ucmmonitor LIBRARY DESTINATION ${INSTALL_REL_PATH} COMPONENT ucm)


--------------------------------------------------------------------------------
/ucm/sparse/gsa/offload_ops/include/thread_safe_queue.h:
--------------------------------------------------------------------------------
 1 | #ifndef THREAD_SAFE_QUEUE_H
 2 | #define THREAD_SAFE_QUEUE_H
 3 | 
 4 | #include <queue>
 5 | #include <mutex>
 6 | #include <condition_variable>
 7 | #include <atomic>
 8 | #include <stdexcept>
 9 | #include <torch/torch.h>
10 | 
11 | struct CopyInfo {
12 |     bool needCalKpre;
13 |     uint32_t layerId;
14 |     std::vector<int32_t> locations;
15 |     torch::Tensor ids;
16 |     torch::Tensor srcTensor;
17 | };
18 | 
19 | class ThreadSafeQueue {
20 | public:
21 |     ThreadSafeQueue();
22 |     ~ThreadSafeQueue() = default;
23 | 
24 |     ThreadSafeQueue(const ThreadSafeQueue&) = delete;
25 |     ThreadSafeQueue& operator=(const ThreadSafeQueue&) = delete;
26 | 
27 |     void push(CopyInfo value);
28 |     CopyInfo pop();
29 |     size_t size() const;
30 |     bool empty() const;
31 |     void stop();
32 |     void clear();
33 | 
34 | private:
35 |     mutable std::mutex m_mutex;
36 |     std::condition_variable m_condVar;
37 |     std::queue<CopyInfo> m_queue;
38 |     std::atomic<bool> m_stopped;
39 | };
40 | 
41 | #endif


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/infra/logger/logger.cpp:
--------------------------------------------------------------------------------
 1 | #include <spdlog/cfg/helpers.h>
 2 | #include <spdlog/details/os.h>
 3 | #include <spdlog/sinks/stdout_color_sinks.h>
 4 | #include "logger.h"
 5 | 
 6 | namespace KVStar {
 7 | 
 8 | static std::mutex g_mutex;
 9 | static std::shared_ptr<spdlog::logger> g_logger = nullptr;
10 | 
11 | std::shared_ptr<spdlog::logger> Logger::Make()
12 | {
13 |     if (g_logger) { return g_logger; }
14 |     std::unique_lock lock(g_mutex);
15 |     if (g_logger) { return g_logger; }
16 |     try {
17 |         const std::string name = "KVSTAR_RETRIEVE";
18 |         const std::string envLevel = name + "_LOGGER_LEVEL";
19 |         g_logger = spdlog::stdout_color_mt(name);
20 |         g_logger->set_pattern("[%Y-%m-%d %H:%M:%S.%f %z] [%n] [%^%L%$] %v [PID: %P, TID: %t] [%s:%#,%!]");
21 |         auto level = spdlog::details::os::getenv(envLevel.c_str());
22 |         if (!level.empty()) { spdlog::cfg::helpers::load_levels(level); }
23 |         return g_logger;
24 |     } catch (...) {
25 |         return spdlog::default_logger();
26 |     }
27 | }
28 | 
29 | }


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/api/kvstar_retrieve/kvstar_retrieve.h:
--------------------------------------------------------------------------------
 1 | #ifndef KVSTAR_RETRIEVE_CLIB_KVSTAR_RETRIEVE_H
 2 | #define KVSTAR_RETRIEVE_CLIB_KVSTAR_RETRIEVE_H
 3 | 
 4 | #include <list>
 5 | #include <string>
 6 | #include <vector>
 7 | #include <numeric> // for std::iota
 8 | #include "retrieve_task/retrieve_task.h"
 9 | #include "retrieve_task/retrieve_task_manager.h"
10 | #include "template/singleton.h"
11 | 
12 | namespace KVStar {
13 | 
14 | struct SetupParam {
15 |     std::vector<int> cpuNumaIds;
16 |     std::vector<std::pair<int, int>> bindInfo; // coreId, numaId
17 |     DeviceType deviceType;
18 |     int totalTpSize;
19 |     int localRankId;
20 |     int threadNum;
21 | 
22 |     SetupParam(const std::vector<int>& cpuNumaIds, const std::vector<std::pair<int, int>>& bindInfo,
23 |                const DeviceType deviceType, const int totalTpSize, const int localRankId);
24 | 
25 | };
26 | 
27 | int32_t Setup(const SetupParam& param);
28 | 
29 | int32_t Wait(const size_t taskId);
30 | 
31 | 
32 | } // namespace KVStar
33 | 
34 | 
35 | 
36 | #endif //KVSTAR_RETRIEVE_CLIB_KVSTAR_RETRIEVE_H
37 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_5_vl/flexreduc_qwen2-5-vl_mlvu.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_5_vl
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | longvideo_kwargs: {
 7 |   'chunked_prefill_frames': 32,
 8 |   'frame_chunk_size': 64,
 9 |   # KVCache compression
10 |   'kvcache_compression': True,
11 |   'kvcache_compression_kwargs': {
12 |     'compression_method': 'stdvidlkv',
13 |     'dynamic_compression_ratio': True,
14 |     'prompt_guided_compression': True,
15 |     'pos_embed_reforge': False,
16 |     'max_input_length': 16000,
17 |     # Temporal
18 |     'enable_temporal_adaptation': True,
19 |     'temporal_adaptation_ratio': 4,
20 |     # Layer
21 |     'budget_allocation_method': 'adakv',
22 |   },
23 | }
24 | 
25 | 
26 | ### dataset
27 | dataset_name: mlvu
28 | anno_file: dataset/mlvu/mlvu.json
29 | dataloader_num_workers: 4
30 | 
31 | ### data
32 | sample_fps: 2
33 | max_num_frames: 2048
34 | longsize_resolution: 448
35 | 
36 | ### generate
37 | do_sample: false
38 | 
39 | ### output
40 | output_dir: results/qwen25vl_7b_mlvu_f2048_2fps_r448/adaretake-16k
41 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_5_vl/flexreduc_qwen2-5-vl_lvbench.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_5_vl
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | longvideo_kwargs: {
 7 |   'chunked_prefill_frames': 32,
 8 |   'frame_chunk_size': 64,
 9 |   # KVCache compression
10 |   'kvcache_compression': True,
11 |   'kvcache_compression_kwargs': {
12 |     'compression_method': 'stdvidlkv',
13 |     'dynamic_compression_ratio': True,
14 |     'prompt_guided_compression': True,
15 |     'pos_embed_reforge': True,
16 |     'max_input_length': 16000,
17 |     # Temporal
18 |     'enable_temporal_adaptation': True,
19 |     'temporal_adaptation_ratio': 4,
20 |     # Layer
21 |     'budget_allocation_method': 'adakv',
22 |   },
23 | }
24 | 
25 | 
26 | ### dataset
27 | dataset_name: lvbench
28 | anno_file: dataset/lvbench/lvbench.json
29 | dataloader_num_workers: 4
30 | 
31 | ### data
32 | sample_fps: 2
33 | max_num_frames: 2048
34 | longsize_resolution: 448
35 | 
36 | ### generate
37 | do_sample: false
38 | 
39 | ### output
40 | output_dir: results/qwen25vl_7b_lvbench_f2048_2fps_r448/adaretake-16k_reforge
41 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_5_vl/flexreduc_qwen2-5-vl_videomme.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_5_vl
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | longvideo_kwargs: {
 7 |   'chunked_prefill_frames': 32,
 8 |   'frame_chunk_size': 64,
 9 |   # KVCache compression
10 |   'kvcache_compression': True,
11 |   'kvcache_compression_kwargs': {
12 |     'compression_method': 'stdvidlkv',
13 |     'dynamic_compression_ratio': True,
14 |     'prompt_guided_compression': True,
15 |     'pos_embed_reforge': True,
16 |     'max_input_length': 16000,
17 |     # Temporal
18 |     'enable_temporal_adaptation': True,
19 |     'temporal_adaptation_ratio': 4,
20 |     # Layer
21 |     'budget_allocation_method': 'adakv',
22 |   },
23 | }
24 | 
25 | 
26 | ### dataset
27 | dataset_name: videomme
28 | anno_file: dataset/video_mme/video_mme.json
29 | dataloader_num_workers: 4
30 | 
31 | ### data
32 | sample_fps: 4
33 | max_num_frames: 2048
34 | longsize_resolution: 448
35 | 
36 | ### generate
37 | do_sample: false
38 | 
39 | ### output
40 | output_dir: results/qwen25vl_7b_videomme_f2048_4fps_r448/adaretake-16k_reforge
41 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_vl/retake_qwen2-vl_mlvu.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_vl
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | longvideo_kwargs: {
 7 |   'frame_chunk_size': 128,
 8 |   'chunked_prefill_frames': 32,
 9 |   # Keyframe compression
10 |   'visual_compression': True,
11 |   'visual_compression_kwargs': {
12 |     'compression_ratio': 1.0,
13 |     'compression_method': 'Keyframe',
14 |     'patch_sync': False,
15 |     'return_keyframe_mask': True
16 |   },
17 |   # KVCache compression
18 |   'kvcache_compression': True,
19 |   'kvcache_compression_kwargs': {
20 |     'dynamic_compression_ratio': True,
21 |     'compression_method': 'pivotkv',
22 |     'pos_embed_reforge': True,
23 |     'max_input_length': 32000
24 |   },
25 | }
26 | 
27 | ### dataset
28 | dataset_name: mlvu
29 | anno_file: dataset/mlvu/mlvu.json
30 | dataloader_num_workers: 2
31 | 
32 | ### data
33 | sample_fps: 4
34 | max_num_frames: 2048
35 | longsize_resolution: 448
36 | 
37 | ### generate
38 | do_sample: false
39 | 
40 | ### output
41 | output_dir: results/qwen2vl_7b_mlvu_f2048_4fps_r448/retake_dp1-async_pivot-32k
42 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/llava_video/retake_llava-video_lvbench.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: llava_video
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | longvideo_kwargs: {
 7 |   'frame_chunk_size': 32,
 8 |   'chunked_prefill_frames': 32,
 9 |   # Keyframe compression
10 |   'visual_compression': True,
11 |   'visual_compression_kwargs': {
12 |     'compression_ratio': 1.0,
13 |     'compression_method': 'Keyframe',
14 |     'patch_sync': False,
15 |     'return_keyframe_mask': True
16 |   },
17 |   # KVCache compression
18 |   'kvcache_compression': True,
19 |   'kvcache_compression_kwargs': {
20 |     'dynamic_compression_ratio': True,
21 |     'compression_method': 'pivotkv',
22 |     'pos_embed_reforge': True,
23 |     'max_input_length': 40000
24 |   },
25 | }
26 | 
27 | ### dataset
28 | dataset_name: lvbench
29 | anno_file: dataset/lvbench/lvbench.json
30 | dataloader_num_workers: 4
31 | 
32 | ### data
33 | sample_fps: 2
34 | max_num_frames: 1024
35 | longsize_resolution: 682
36 | 
37 | ### generate
38 | do_sample: false
39 | 
40 | ### output
41 | output_dir: results/llava-video_f1024_2fps_r682/retake_dp1-async_pivot-40k
42 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/llava_video/retake_llava-video_mlvu.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: llava_video
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | longvideo_kwargs: {
 7 |   'frame_chunk_size': 32,
 8 |   'chunked_prefill_frames': 32,
 9 |   # Keyframe compression
10 |   'visual_compression': True,
11 |   'visual_compression_kwargs': {
12 |     'compression_ratio': 1.0,
13 |     'compression_method': 'Keyframe',
14 |     'patch_sync': False,
15 |     'return_keyframe_mask': True
16 |   },
17 |   # KVCache compression
18 |   'kvcache_compression': True,
19 |   'kvcache_compression_kwargs': {
20 |     'dynamic_compression_ratio': True,
21 |     'compression_method': 'pivotkv',
22 |     'pos_embed_reforge': True,
23 |     'max_input_length': 40000
24 |   },
25 | }
26 | 
27 | ### dataset
28 | dataset_name: mlvu
29 | anno_file: dataset/mlvu/mlvu.json
30 | dataloader_num_workers: 4
31 | 
32 | ### data
33 | sample_fps: 2
34 | max_num_frames: 1024
35 | longsize_resolution: 682
36 | 
37 | ### generate
38 | do_sample: false
39 | 
40 | ### output
41 | output_dir: results/llava-video_rope4_mlvu_f1024_2fps_r682/retake_dp1-async_pivot-40k
42 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_vl/retake_qwen2-vl_lvbench.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_vl
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | longvideo_kwargs: {
 7 |   'frame_chunk_size': 128,
 8 |   'chunked_prefill_frames': 32,
 9 |   # Keyframe compression
10 |   'visual_compression': True,
11 |   'visual_compression_kwargs': {
12 |     'compression_ratio': 1.0,
13 |     'compression_method': 'Keyframe',
14 |     'patch_sync': False,
15 |     'return_keyframe_mask': True
16 |   },
17 |   # KVCache compression
18 |   'kvcache_compression': True,
19 |   'kvcache_compression_kwargs': {
20 |     'dynamic_compression_ratio': True,
21 |     'compression_method': 'pivotkv',
22 |     'pos_embed_reforge': True,
23 |     'max_input_length': 32000
24 |   },
25 | }
26 | 
27 | ### dataset
28 | dataset_name: lvbench
29 | anno_file: dataset/lvbench/lvbench.json
30 | dataloader_num_workers: 2
31 | 
32 | ### data
33 | sample_fps: 2
34 | max_num_frames: 2048
35 | longsize_resolution: 448
36 | 
37 | ### generate
38 | do_sample: false
39 | 
40 | ### output
41 | output_dir: results/qwen2vl_7b_lvbench_f2048_2fps_r448/retake_dp1-async_pivot-32k
42 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_runner.cpp:
--------------------------------------------------------------------------------
 1 | #include "retrieve_task_runner.h"
 2 | #include <functional>
 3 | #include <map>
 4 | #include <thread>
 5 | #include <chrono>
 6 | 
 7 | #include "logger/logger.h"
 8 | #include "memory/memory.h"
 9 | #include "template/singleton.h"
10 | #include "simd_compute_kernel.h"
11 | 
12 | namespace KVStar {
13 | 
14 | Status RetrieveTaskRunner::Run(const RetrieveTask& task, TaskResult& result) {
15 |     try {
16 |         KVSTAR_DEBUG("Task {} starting pure C++ computation.", task.allocTaskId);
17 | 
18 |         KVStar::Execute(task, result);
19 | 
20 |         KVSTAR_DEBUG("Task {} pure C++ computation finished successfully.", task.allocTaskId);
21 | 
22 | 
23 |     } catch (const std::exception& e) {
24 |         KVSTAR_ERROR("Task {} failed during computation in Runner. Error: {}", task.allocTaskId, e.what());
25 | 
26 |         {
27 |             std::lock_guard<std::mutex> lock(result.mtx);
28 |             result.errorMessage = e.what();
29 |             result.status.store(TaskStatus::FAILURE, std::memory_order_release);
30 |         }
31 | 
32 | 
33 |     }
34 | 
35 |     return Status::OK();
36 | }
37 | 
38 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/test/config.yaml:
--------------------------------------------------------------------------------
 1 | reports:
 2 |   base_dir: "results/reports"
 3 |   use_timestamp: true
 4 |   directory_prefix: "pytest"
 5 |   html: # pytest-html
 6 |     enabled: false
 7 |     filename: "report.html"
 8 |     title: "UCM Pytest Test Report"
 9 | 
10 | database:
11 |   backup: "results/"
12 |   enabled: true
13 |   host: "127.0.0.1"
14 |   port: 5432
15 |   name: "ucm_test"
16 |   user: "postgres"
17 |   password: "123456"
18 | 
19 | models:
20 |   ip_ports: ""
21 |   tokenizer_path: ""
22 |   served_model_name: ""
23 |   payload: ''
24 |   enable_clear_hbm: false
25 | 
26 | # LLM Connection Configuration
27 | llm_connection:
28 |   model: ""
29 |   server_url: ""
30 |   tokenizer_path: ""
31 |   stream: true    # stream output
32 |   ignore_eos: true    # Ignore the returned terminator
33 |   timeout: 180    # request time out
34 | 
35 | # Environment Pre-Check Configuration
36 | Env_preCheck:
37 |   master_ip: 192.168.0.1
38 |   worker_ip:
39 |   ascend_rt_visible_devices: ""
40 |   node_num:
41 |   model_path: ""
42 |   hf_model_name: ""
43 |   middle_page: ""
44 |   expected_embed_bandwidth: 10
45 |   expected_fetch_bandwidth: 10
46 |   kvCache_block_number: 1024
47 |   storage_backends: [""]


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/llava_video/retake_llava-video_videomme.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: llava_video
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | longvideo_kwargs: {
 7 |   'frame_chunk_size': 32,
 8 |   'chunked_prefill_frames': 32,
 9 |   # Keyframe compression
10 |   'visual_compression': True,
11 |   'visual_compression_kwargs': {
12 |     'compression_ratio': 1.0,
13 |     'compression_method': 'Keyframe',
14 |     'patch_sync': False,
15 |     'return_keyframe_mask': True
16 |   },
17 |   # KVCache compression
18 |   'kvcache_compression': True,
19 |   'kvcache_compression_kwargs': {
20 |     'dynamic_compression_ratio': True,
21 |     'compression_method': 'pivotkv',
22 |     'pos_embed_reforge': True,
23 |     'max_input_length': 40000
24 |   },
25 | }
26 | 
27 | ### dataset
28 | dataset_name: videomme
29 | anno_file: dataset/video_mme/video_mme.json
30 | dataloader_num_workers: 4
31 | 
32 | ### data
33 | sample_fps: 2
34 | max_num_frames: 1024
35 | longsize_resolution: 682
36 | 
37 | ### generate
38 | do_sample: false
39 | 
40 | ### output
41 | output_dir: results/llava-video_rope4_video_mme_f1024_2fps_r682/retake_dp1-async_pivot-40k
42 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/configs/qwen2_5_vl/flexreduc_qwen2-5-vl_longvideobench.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name: qwen2_5_vl
 3 | method: retake
 4 | scaling_factor: 4
 5 | attn_implementation: "flash_attention_2"
 6 | longvideo_kwargs: {
 7 |   'chunked_prefill_frames': 32,
 8 |   'frame_chunk_size': 64,
 9 |   # KVCache compression
10 |   'kvcache_compression': True,
11 |   'kvcache_compression_kwargs': {
12 |     'compression_method': 'stdvidlkv',
13 |     'dynamic_compression_ratio': True,
14 |     'prompt_guided_compression': True,
15 |     'max_guide_length': 152,
16 |     'pos_embed_reforge': False,
17 |     'max_input_length': 16000,
18 |     # Temporal
19 |     'enable_temporal_adaptation': True,
20 |     'temporal_adaptation_ratio': 4,
21 |     # Layer
22 |     'budget_allocation_method': 'adakv',
23 |   },
24 | }
25 | 
26 | 
27 | ### dataset
28 | dataset_name: longvideobench
29 | anno_file: dataset/longvideobench/longvideobench_val.json
30 | dataloader_num_workers: 4
31 | 
32 | ### data
33 | sample_fps: 2
34 | max_num_frames: 2048
35 | longsize_resolution: 448
36 | 
37 | ### generate
38 | do_sample: false
39 | 
40 | ### output
41 | output_dir: results/qwen25vl_7b_longvideobench_f2048_2fps_r448/adaretake-16k
42 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/maca/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(CUDA_ROOT "/opt/maca/tools/cu-bridge" CACHE PATH "Path to WCUDA root directory")
 2 | set(CMAKE_CUDA_COMPILER ${CUDA_ROOT}/bin/cucc)
 3 | list(APPEND CMAKE_MODULE_PATH "${CUDA_ROOT}/cmake_module/maca")
 4 | enable_language(CUDA)
 5 | add_library(kernel OBJECT maca_sm_kernel.cu)
 6 | target_compile_options(kernel PRIVATE
 7 |     -Wall -fPIC
 8 |     -std=c++17
 9 | )
10 | add_library(trans STATIC
11 |     ${CMAKE_CURRENT_LIST_DIR}/../cuda/cuda_device.cc
12 |     ${CMAKE_CURRENT_LIST_DIR}/../cuda/cuda_buffer.cc
13 |     ${CMAKE_CURRENT_LIST_DIR}/../cuda/cuda_stream.cc
14 |     ${CMAKE_CURRENT_LIST_DIR}/../cuda/cuda_sm_stream.cc
15 | )
16 | 
17 | add_library(WCUDA::cudart UNKNOWN IMPORTED)
18 | set_target_properties(WCUDA::cudart PROPERTIES
19 | 	INTERFACE_INCLUDE_DIRECTORIES "${CUDA_ROOT}/include"
20 | 	IMPORTED_LOCATION "${CUDA_ROOT}/lib/libcuda.so"
21 | )
22 | target_include_directories(WCUDA::cudart INTERFACE
23 | 	/opt/maca/include
24 | 	/opt/maca/include/mcr
25 | )
26 | 
27 | target_include_directories(trans PUBLIC ${CUDA_ROOT}/include)
28 | target_link_directories(trans PUBLIC ${CUDA_ROOT}/lib64)
29 | target_link_libraries(trans PUBLIC
30 |     fmt
31 |     WCUDA::cudart
32 |     kernel
33 | )
34 | 


--------------------------------------------------------------------------------
/ucm/sparse/gsa/offload_ops/src/thread_safe_queue.cpp:
--------------------------------------------------------------------------------
 1 | #include "thread_safe_queue.h"
 2 | 
 3 | ThreadSafeQueue::ThreadSafeQueue() : m_stopped(false) {}
 4 | 
 5 | void ThreadSafeQueue::push(CopyInfo value) {
 6 |     std::lock_guard<std::mutex> lock(m_mutex);
 7 |     m_queue.push(std::move(value));
 8 |     m_condVar.notify_one();
 9 | }
10 | 
11 | CopyInfo ThreadSafeQueue::pop() {
12 |     std::unique_lock<std::mutex> lock(m_mutex);
13 |     m_condVar.wait(lock, [this] { 
14 |         return !m_queue.empty() || m_stopped; 
15 |     });
16 |     CopyInfo value = std::move(m_queue.front());
17 |     m_queue.pop();
18 |     return value;
19 | }
20 | 
21 | size_t ThreadSafeQueue::size() const {
22 |     std::lock_guard<std::mutex> lock(m_mutex);
23 |     return m_queue.size();
24 | }
25 | 
26 | bool ThreadSafeQueue::empty() const {
27 |     std::lock_guard<std::mutex> lock(m_mutex);
28 |     return m_queue.empty();
29 | }
30 | 
31 | void ThreadSafeQueue::stop() {
32 |     std::lock_guard<std::mutex> lock(m_mutex);
33 |     m_stopped = true;
34 |     m_condVar.notify_all();
35 | }
36 | 
37 | void ThreadSafeQueue::clear() {
38 |     std::lock_guard<std::mutex> lock(m_mutex);
39 |     while (!m_queue.empty()) {
40 |         m_queue.pop();
41 |     }
42 | }


--------------------------------------------------------------------------------
/ucm/sparse/gsa/offload_ops/src/pybinds.cpp:
--------------------------------------------------------------------------------
 1 | #pragma GCC diagnostic push
 2 | #include <torch/extension.h>
 3 | #include <pybind11/pybind11.h>
 4 | #include <pybind11/stl.h>
 5 | #include <pybind11/functional.h>
 6 | #pragma GCC diagnostic pop
 7 | #include "cal_kpre_and_topk.h"
 8 | 
 9 | PYBIND11_MODULE(gsa_offload_ops, m)
10 | {
11 |     pybind11::class_<CalKpreAndTopk>(m, "CalKpreAndTopk")
12 |     .def(pybind11::init<int, int, int, int, int>())
13 |     .def_readwrite("k_cache", &CalKpreAndTopk::m_kCache)
14 |     .def_readwrite("q_cache", &CalKpreAndTopk::m_qCache)
15 |     .def("set_kpre_method_param", &CalKpreAndTopk::SetKpreMethodParam)
16 |     .def("set_kpre_cache", &CalKpreAndTopk::SetKpreCache)
17 |     .def("set_topk_cache", &CalKpreAndTopk::SetTopkCache)
18 |     .def("set_common_param", &CalKpreAndTopk::SetCommonParam)
19 |     .def("set_topk_param", &CalKpreAndTopk::SetTopkParam)
20 |     .def("set_kpre_param", &CalKpreAndTopk::SetKpreParam)
21 |     .def("set_kpre_data_ready", &CalKpreAndTopk::SetKpreDataReady)
22 |     .def("set_topk_data_ready", &CalKpreAndTopk::SetTopkDataReady)
23 |     .def("add_copy_req", &CalKpreAndTopk::AddCopyReq)
24 |     .def("is_calculate_finish", &CalKpreAndTopk::IsCalculateFinish);
25 | }
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_manager.h:
--------------------------------------------------------------------------------
 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_MANAGER_H
 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_MANAGER_H
 3 | 
 4 | #include <memory>
 5 | #include <unordered_map>
 6 | #include <vector>
 7 | #include "retrieve_task_queue.h"
 8 | #include "task_result.h"
 9 | 
10 | namespace KVStar {
11 | class RetrieveTaskManager {
12 | public:
13 |     Status Setup(const size_t threadNum, const std::vector<std::pair<int, int>>& bindInfo);
14 |     Status SubmitSingleTask(RetrieveTask&&task, size_t &taskId);
15 | 
16 |     Status GetResult(size_t taskId, std::shared_ptr<TaskResult>& result);
17 | 
18 |     Status Wait(const size_t taskId);
19 | private:
20 |     void Dispatch();
21 | 
22 | private:
23 |     std::mutex _mutex;
24 |     RetrieveTaskSet _failureSet;
25 |     std::unordered_map<size_t, std::shared_ptr<RetrieveTaskWaiter>> _waiters;
26 | 
27 |     std::unordered_map<size_t, std::shared_ptr<TaskResult>> _resultMap;
28 | 
29 |     std::vector<std::unique_ptr<RetrieveTaskQueue>> _queues;
30 |     size_t _lastTimeScheduledQueueIdx{0};
31 |     size_t _taskIdSeed{0};
32 | 
33 | };
34 | 
35 | }
36 | 
37 | 
38 | 
39 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_MANAGER_H
40 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_queue.h:
--------------------------------------------------------------------------------
 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_QUEUE_H
 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_QUEUE_H
 3 | 
 4 | #include <condition_variable>
 5 | #include <future>
 6 | #include <list>
 7 | #include <mutex>
 8 | #include <thread>
 9 | #include "status/status.h"
10 | 
11 | #include "retrieve_task.h"
12 | #include "retrieve_task_set.h"
13 | #include "task_result.h"
14 | 
15 | namespace KVStar {
16 | struct WorkItem {
17 |     RetrieveTask task;
18 |     std::shared_ptr<TaskResult> result;
19 | };
20 | 
21 | class RetrieveTaskQueue {
22 | public:
23 |     ~RetrieveTaskQueue();
24 |     Status Setup(const int numaId, const int bindCoreId, RetrieveTaskSet* failureSet); // failureSet from manager, for all queue
25 |     void Push(WorkItem&& item);
26 | 
27 | private:
28 |     void Worker(const int numaId, const int bindCoreId, std::promise<Status>& started);
29 | 
30 | private:
31 |     std::list<WorkItem> _taskQ;
32 |     std::mutex _mutex;
33 |     std::condition_variable _cv;
34 |     std::thread _worker;
35 |     bool _running{false};
36 |     RetrieveTaskSet* _failureSet;
37 | 
38 | 
39 | };
40 | }
41 | 
42 | 
43 | 
44 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_QUEUE_H


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--  Thanks for sending a pull request!
 2 | 
 3 | BEFORE SUBMITTING, PLEASE READ OUR OFFICIAL WEBSITE.
 4 | 
 5 | -->
 6 | 
 7 | # Purpose
 8 | 
 9 | What this PR does / why we need it?
10 | <!--
11 | - Please clarify what changes you are proposing. The purpose of this section is to outline the changes and how this PR fixes the issue.
12 | If possible, please consider writing useful notes for better and faster reviews in your PR.
13 | 
14 | - Please clarify why the changes are needed. For instance, the use case and bug description.
15 | 
16 | - Fixes #
17 | -->
18 | 
19 | # Modifications 
20 | 
21 | Does this PR introduce _any_ user-facing change?
22 | <!--
23 | Note that it means *any* user-facing change including all aspects such as API, interface or other behavior changes.
24 | Documentation-only updates are not considered user-facing changes.
25 | -->
26 | 
27 | # Test
28 | 
29 | How was this patch tested?
30 | <!--
31 | CI passed with new added/existing test.
32 | If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
33 | If tests were not added, please describe why they were not added and/or why it was difficult to add.
34 | -->


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/scripts/submission/prepare_lvbench_submission.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import pandas as pd
 5 | 
 6 | 
 7 | def load_jsonl(file):
 8 |     return [json.loads(line) for line in open(file, "r").readlines()]
 9 | 
10 | 
11 | predict_result_dir = "./results/path_to_results"
12 | output_file = "./ReTaKe_LVBench_submission.json"
13 | 
14 | LVBENCH_ANNO_FILE = "./dataset/lvbench/lvbench.json"
15 | 
16 | 
17 | ################ DO NOT CHANGE ################
18 | def create_submission_file(predict_result_dir, output_file):
19 |     results_df = pd.read_csv(os.path.join(predict_result_dir, "eval_results.csv"))
20 | 
21 |     video_id2results = {}
22 |     res = results_df.loc[0]
23 |     video_id2results["KIR"] = res["key information retrieval"] / 100
24 |     video_id2results["EU"] = res["event understanding"] / 100
25 |     video_id2results["Sum"] = res["summarization"] / 100
26 |     video_id2results["ER"] = res["entity recognition"] / 100
27 |     video_id2results["Rea"] = res["reasoning"] / 100
28 |     video_id2results["TG"] = res["temporal grounding"] / 100
29 |     video_id2results["Overall"] = res["overall"] / 100
30 | 
31 |     with open(output_file, "w") as f:
32 |         json.dump(video_id2results, f, indent=2)
33 | 
34 | 
35 | create_submission_file(predict_result_dir, output_file)
36 | 


--------------------------------------------------------------------------------
/ucm/shared/infra/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | file(GLOB_RECURSE UCMINFRA_STATUS_SOURCE_FILES "status/*.*")
 2 | add_library(infra_status OBJECT ${UCMINFRA_STATUS_SOURCE_FILES})
 3 | target_include_directories(infra_status PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 4 | target_link_libraries(infra_status PUBLIC fmt)
 5 | 
 6 | file(GLOB UCMINFRA_LOGGER_SOURCE_FILES "logger/*.*")
 7 | file(GLOB_RECURSE UCMINFRA_LOGGER_DETAIL_SOURCE_FILES "logger/${LOGGER_BACKEND}/*.cc")
 8 | add_library(infra_logger OBJECT ${UCMINFRA_LOGGER_SOURCE_FILES} ${UCMINFRA_LOGGER_DETAIL_SOURCE_FILES})
 9 | target_include_directories(infra_logger PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
10 | target_link_libraries(infra_logger PUBLIC fmt spdlog)
11 | 
12 | file(GLOB_RECURSE UCMINFRA_TEMPLATE_SOURCE_FILES "template/*.*")
13 | add_library(infra_template OBJECT ${UCMINFRA_TEMPLATE_SOURCE_FILES})
14 | target_include_directories(infra_template PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
15 | 
16 | file(GLOB_RECURSE UCMINFRA_THREAD_SOURCE_FILES "thread/*.*")
17 | add_library(infra_thread OBJECT ${UCMINFRA_THREAD_SOURCE_FILES})
18 | target_include_directories(infra_thread PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
19 | 
20 | file(GLOB_RECURSE UCMINFRA_TIME_SOURCE_FILES "time/*.*")
21 | add_library(infra_time OBJECT ${UCMINFRA_TIME_SOURCE_FILES})
22 | target_include_directories(infra_time PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
23 | 


--------------------------------------------------------------------------------
/.github/workflows/e2e_test.yml:
--------------------------------------------------------------------------------
 1 | name: offline_inference_test
 2 | on: 
 3 |     workflow_dispatch:
 4 | 
 5 | jobs:
 6 |   offline-inference:
 7 |     runs-on: arc-runner-ucm       
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |       - run: nvidia-smi
11 |       - name: Run offline_inference in container
12 |         run: |
13 |           docker run --rm \
14 |             --gpus all \
15 |             -v ${{ github.workspace }}:/workspace/unified-cache-management \
16 |             -v /home_116/models/Qwen2.5-1.5B-Instruct:/home/models/Qwen2.5-1.5B-Instruct \
17 |             -w /workspace/unified-cache-management \
18 |             --entrypoint /bin/bash \
19 |             vllm/vllm-openai:v0.9.2 \
20 |             -c "
21 |               set -euo pipefail
22 |               export PLATFORM=cuda
23 |               export MODEL_PATH=/home/models/Qwen2.5-1.5B-Instruct
24 |               pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
25 |               pip install -v -e . --no-build-isolation
26 |               cd \$(pip show vllm | grep Location | awk '{print \$2}') &&
27 |               git apply /workspace/unified-cache-management/ucm/integration/vllm/patch/0.9.2/vllm-adapt.patch
28 |               cd /workspace/unified-cache-management
29 |               python3 examples/offline_inference.py
30 |             "


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/110-user-story.yml:
--------------------------------------------------------------------------------
 1 | name: 📚 User Story
 2 | description: Apply for an user story to be displayed on ucm official website
 3 | title: "[User Story]: "
 4 | labels: ["user-story"]
 5 | 
 6 | body:
 7 | - type: textarea
 8 |   attributes:
 9 |     label: 📚 Title
10 |     description: >
11 |       A clear title about what your user story is about.
12 |   validations:
13 |     required: true
14 | - type: textarea
15 |   attributes:
16 |     label: About / Introduction
17 |     description: >
18 |       A brief introduction about the background of your use case, like your scenario, hardware size etc.
19 | - type: textarea
20 |   attributes:
21 |     label: Bussiness Challenges
22 |     description: >
23 |       Tell us how what kind of challenge you faced in this user story.
24 | - type: textarea
25 |   attributes:
26 |     label: Solving challenges with ucm and benefits
27 |     description: >
28 |       Tell us how unifiedcache helped you overcome the challenges, including details like how you use it, what version you used, hardware info, etc. And what kind of benefit do you get from using unifiedcache
29 | - type: textarea
30 |   attributes:
31 |     label: Extra Info
32 |     description: >
33 |       Any extra infomation you want to include in this story
34 | - type: markdown
35 |   attributes:
36 |     value: >
37 |       Thanks for contributing 🎉!
38 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/api/kvstar_retrieve/kvstar_retrieve.cpp:
--------------------------------------------------------------------------------
 1 | #include <spdlog/fmt/ranges.h>
 2 | 
 3 | #include "kvstar_retrieve.h"
 4 | #include "status/status.h"
 5 | #include "logger/logger.h"
 6 | #include "template/singleton.h"
 7 | #include "retrieve_task/retrieve_task_manager.h"
 8 | 
 9 | namespace KVStar {
10 | SetupParam::SetupParam(const std::vector<int>& cpuNumaIds, const std::vector<std::pair<int, int>>& bindInfo, const DeviceType deviceType, const int totalTpSize, const int localRankId)
11 |         : cpuNumaIds{cpuNumaIds}, bindInfo{bindInfo}, deviceType{deviceType},
12 |           totalTpSize{totalTpSize}, localRankId{localRankId}
13 | {
14 |     this->threadNum = this->bindInfo.size();
15 |     KVSTAR_DEBUG("Successfully configured. Total threads = {}.", this->threadNum);
16 | }
17 | 
18 | 
19 | int32_t Setup(const SetupParam& param)
20 | {
21 | 
22 |     auto status = Singleton<RetrieveTaskManager>::Instance()->Setup(param.threadNum, param.bindInfo);
23 |     if (status.Failure()) {
24 |         KVSTAR_ERROR("Failed({}) to setup RetrieveTaskManager.", status);
25 |         return status.Underlying();
26 |     }
27 |     KVSTAR_DEBUG("Setup RetrieveTaskManager success.");
28 | 
29 |     return Status::OK().Underlying();
30 | }
31 | 
32 | int32_t Wait(const size_t taskId) {
33 |     return Singleton<RetrieveTaskManager>::Instance()->Wait(taskId).Underlying();
34 | }
35 | 
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/500-feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: 🚀 Feature request
 2 | description: Submit a proposal/request for a new ucm feature
 3 | title: "[Feature]: "
 4 | labels: ["feature request"]
 5 | 
 6 | body:
 7 | - type: markdown
 8 |   attributes:
 9 |     value: >
10 |       #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/ModelEngine-Group/unified-cache-management/issues?q=is%3Aissue%20sort%3Acreated-desc).
11 | - type: textarea
12 |   attributes:
13 |     label: 🚀 The feature, motivation and pitch
14 |     description: >
15 |       A clear and concise description of the feature proposal. Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link here too.
16 |   validations:
17 |     required: true
18 | - type: textarea
19 |   attributes:
20 |     label: Alternatives
21 |     description: >
22 |       A description of any alternative solutions or features you've considered, if any.
23 | - type: textarea
24 |   attributes:
25 |     label: Additional context
26 |     description: >
27 |       Add any other context or screenshots about the feature request.
28 | - type: markdown
29 |   attributes:
30 |     value: >
31 |       Thanks for contributing 🎉!
32 | 


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task_set.h:
--------------------------------------------------------------------------------
 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_SET_H
 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_SET_H
 3 | 
 4 | #include <algorithm>
 5 | #include <list>
 6 | #include <mutex>
 7 | #include <shared_mutex>
 8 | 
 9 | namespace KVStar {
10 | class RetrieveTaskSet {
11 |     static constexpr size_t nBucket = 8192;
12 | public:
13 |     void Insert(const size_t id)
14 |     {
15 |         auto idx = this->Hash(id);
16 |         std::unique_lock<std::shared_mutex> lk(this->_mutexes[idx]);
17 |         this->_buckets[idx].push_back(id);
18 |     }
19 |     bool Exist(const size_t id)
20 |     {
21 |         auto idx = this->Hash(id);
22 |         std::shared_lock<std::shared_mutex> lk(this->_mutexes[idx]);
23 |         auto bucket = this->_buckets + idx;
24 |         return std::find(bucket->begin(), bucket->end(), id) != bucket->end();
25 |     }
26 |     void Remove(const size_t id)
27 |     {
28 |         auto idx = this->Hash(id);
29 |         std::unique_lock<std::shared_mutex> lk(this->_mutexes[idx]);
30 |         this->_buckets[idx].remove(id);
31 |     }
32 | 
33 | private:
34 |     size_t Hash(const size_t id) { return id % nBucket; }
35 | 
36 | private:
37 |     std::shared_mutex _mutexes[nBucket];
38 |     std::list<size_t> _buckets[nBucket];
39 | 
40 | };
41 | 
42 | }
43 | 
44 | 
45 | 
46 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_SET_H


--------------------------------------------------------------------------------
/ucm/sparse/kvstar/retrieve/core/domain/retrieve_task/retrieve_task.h:
--------------------------------------------------------------------------------
 1 | #ifndef UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_H
 2 | #define UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_H
 3 | 
 4 | #include <cstdint>
 5 | #include <string>
 6 | #include <vector>
 7 | #include <optional>
 8 | #include "retrieve_task_waiter.h"
 9 | #include "computation_task.h"
10 | 
11 | namespace KVStar {
12 | 
13 | enum DeviceType {
14 |     CPU = 0,
15 |     NPU,
16 |     GPU,
17 |     TYPE_END
18 | };
19 | 
20 | struct RetrieveTask {
21 |     PlainTensor queryGroup;
22 |     PlainTensor blkRepre;
23 |     std::optional<PlainTensor> dPrunedIndex;
24 | 
25 |     int topK;
26 |     int reqId;
27 |     DeviceType deviceType;
28 |     size_t allocTaskId;
29 |     std::shared_ptr<RetrieveTaskWaiter> waiter;
30 | 
31 |     RetrieveTask(
32 |             PlainTensor qGroup, PlainTensor bRepre, std::optional<PlainTensor> pIndex,
33 |             int tK, int rId, DeviceType devType
34 |     ) : queryGroup(std::move(qGroup)),
35 |         blkRepre(std::move(bRepre)),
36 |         dPrunedIndex(std::move(pIndex)),
37 |         topK(tK),
38 |         reqId(rId),
39 |         deviceType(devType),
40 |         allocTaskId(0) {}
41 | 
42 |     RetrieveTask() = default;
43 |     RetrieveTask(RetrieveTask&& other) noexcept = default;
44 |     RetrieveTask& operator=(RetrieveTask&& other) noexcept = default;
45 | };
46 | 
47 | }
48 | 
49 | #endif //UCM_SPARSE_KVSTAR_RETRIEVE_RETRIEVE_TASK_H


--------------------------------------------------------------------------------
/examples/ucm_config_example.yaml:
--------------------------------------------------------------------------------
 1 | # UCM Configuration File Example
 2 | # 
 3 | # This file demonstrates how to configure UCM using YAML.
 4 | # You can use this config file by setting the path to this file in kv_connector_extra_config in launch script or command line like this:
 5 | # kv_connector_extra_config={"UCM_CONFIG_FILE": "/workspace/unified-cache-management/examples/ucm_config_example.yaml"}
 6 | #
 7 | # Alternatively, you can still use kv_connector_extra_config in KVTransferConfig
 8 | # for backward compatibility.
 9 | 
10 | # Connector name (e.g., "UcmNfsStore", "UcmDramStore")
11 | ucm_connectors:
12 |   - ucm_connector_name: "UcmNfsStore"
13 |     ucm_connector_config:
14 |       storage_backends: "/mnt/test"
15 |       use_direct: false
16 | 
17 | load_only_first_rank: false
18 | 
19 | # Enable UCM metrics so they can be monitored online via Grafana and Prometheus.
20 | # metrics_config_path: "/workspace/unified-cache-management/examples/metrics/metrics_configs.yaml"
21 | 
22 | # Sparse attention configuration
23 | # Format 1: Dictionary format (for methods like ESA, KvComp)
24 | # ucm_sparse_config:
25 | #   ESA:
26 | #     init_window_sz: 1
27 | #     local_window_sz: 2
28 | #     min_blocks: 4
29 | #     sparse_ratio: 0.3
30 | #     retrieval_stride: 5
31 |   # Or for GSA:
32 |   # GSA: {}
33 | 
34 | 
35 | # Whether to use layerwise loading/saving (optional, default: True for UCMConnector)
36 | # use_layerwise: true
37 | # hit_ratio: 0.9
38 | 
39 | 


--------------------------------------------------------------------------------
/ucm/store/detail/task/task_set.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_TASK_SET_H
25 | #define UNIFIEDCACHE_TASK_SET_H
26 | 
27 | #include "template/hashset.h"
28 | 
29 | namespace UC {
30 | 
31 | class TaskSet : public HashSet<size_t> {};
32 | 
33 | } // namespace UC
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/600-new-model.yml:
--------------------------------------------------------------------------------
 1 | name: 🤗 Support request for new model supported from huggingface/modelscope/modelers on ucm
 2 | description: Submit a proposal/request for a new model from huggingface/modelscope/modelers on ucm
 3 | title: "[New Model]: "
 4 | labels: ["new model"]
 5 | 
 6 | body:
 7 | - type: markdown
 8 |   attributes:
 9 |     value: >
10 |       #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/ModelEngine-Group/unified-cache-management/issues?q=is%3Aissue%20sort%3Acreated-desc).
11 | 
12 |       #### We also highly recommend you read our official website first to know which model already supported.
13 | - type: textarea
14 |   attributes:
15 |     label: The model to consider.
16 |     description: >
17 |       A huggingface/modelscope/modelers url, pointing to the model, e.g. https://huggingface.co/openai-community/gpt2 .
18 |   validations:
19 |     required: true
20 | - type: textarea
21 |   attributes:
22 |     label: The closest model ucm already supports.
23 |     description: >
24 |       Here is the list of models already supported by unifiedcache in our official website . Which model is the most similar to the model you want to add support for?
25 | - type: textarea
26 |   attributes:
27 |     label: What's your difficulty of supporting the model you want?
28 |     description: >
29 |       For example, any new operators or new architecture?
30 | - type: markdown
31 |   attributes:
32 |     value: >
33 |       Thanks for contributing 🎉!
34 | 


--------------------------------------------------------------------------------
/test/common/doc/LLMPerf.md:
--------------------------------------------------------------------------------
 1 | # 📝 LLM 性能测试使用说明
 2 | 
 3 | ## 🔧 功能概述  
 4 | 本测试用于评估 LLM 推理服务在不同负载下的性能表现，涵盖延迟、吞吐量、请求成功率等关键指标。
 5 | 
 6 | ## 📌 测试参数说明
 7 | 
 8 | | 参数 | 说明 | 示例 |
 9 | |------|------|------|
10 | | `mean_input_tokens` | 平均输入 token 数 | `[2000, 3000]` |
11 | | `mean_output_tokens` | 平均输出 token 数 | `[200, 500]` |
12 | | `max_num_completed_requests` | 最大完成请求数 | `[8, 4]` |
13 | | `concurrent_requests` | 并发请求数 | `[8, 4]` |
14 | | `additional_sampling_params` | 额外采样参数（如 temperature） | `["{}", "{}"]` |
15 | | `hit_rate` | 缓存命中率 | `[0, 50]` |
16 | 
17 | > ✅ 支持多组参数组合运行，自动执行多轮推理并收集统计结果。
18 | 
19 | ## 📊 输出结果
20 | 
21 | 测试完成后，将输出以下性能指标的统计值（每轮结果均记录）：
22 | 
23 | - **延迟指标**：  
24 |   - `inter_token_latency_s`（token 间延迟）  
25 |   - `ttft_s`（首个 token 延迟）  
26 |   - `end_to_end_latency_s`（端到端延迟）  
27 |   - 各项包含：P50、P90、P99、平均值
28 | 
29 | - **吞吐量指标**：  
30 |   - `total_throughput`（总吞吐量）  
31 |   - `incremental_throughput`（增量吞吐量）
32 | 
33 | - **其他指标**：  
34 |   - `num_completed_requests`（完成请求数）  
35 |   - `elapsed_time`（总耗时）  
36 |   - `incremental_time_delay`（增量时间延迟）
37 | 
38 | ## ✅ 验证规则
39 | 
40 | - 所有数值必须 > 0
41 | - 若出现 `None` 或 ≤ 0 的值，测试将标记为失败，并输出异常详情
42 | 
43 | ## 📤 输出格式
44 | 
45 | 返回一个字典，包含：
46 | ```python
47 | {
48 |     "_name": "llmperf",
49 |     "_data": {  # 所有指标的列表
50 |         "results_inter_token_latency_s_quantiles_p50": [...],
51 |         "results_ttft_s_mean": [...],
52 |         # ...
53 |     }
54 | }
55 | ```
56 | 
57 | ## 🚀 使用方式 test/下运行
58 | 
59 | # 按文件运行
60 | pytest test_uc_performance.py
61 | 
62 | # 按阶段运行
63 | pytest --stage=0
64 | 
65 | # 按特性运行
66 | pytest --feature=uc_performance_test
67 | 
68 | > ⚠️ 确保已安装依赖：`pytest` 等模块。


--------------------------------------------------------------------------------
/ucm/store/detail/task/task_queue.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_TASK_QUEUE_H
25 | #define UNIFIEDCACHE_TASK_QUEUE_H
26 | 
27 | #include "task_shard.h"
28 | 
29 | namespace UC {
30 | 
31 | class TaskQueue {
32 | public:
33 |     virtual ~TaskQueue() = default;
34 |     virtual void Push(std::list<Task::Shard>& shards) noexcept = 0;
35 | };
36 | 
37 | } // namespace UC
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/docs/prepare_videomme.md:
--------------------------------------------------------------------------------
 1 | ## Prepare VideoMME Dataset
 2 | 
 3 | 
 4 | ### Step 1: Download VideoMME dataset from [huggingface](https://huggingface.co/datasets/lmms-lab/Video-MME)
 5 | ```bash
 6 | git clone https://huggingface.co/datasets/lmms-lab/Video-MME
 7 | ```
 8 | 
 9 | Denote the root directory of download VideoMME dataset as `videomme_root`, it should has the following structure:
10 | ```
11 | ${videomme_root}/
12 | ├── videomme/
13 | ├── subtitle.zip
14 | ├── videos_chunked_01.zip
15 | ├── videos_chunked_02.zip
16 | ├── ...
17 | └── videos_chunked_20.zip
18 | ```
19 | 
20 | 
21 | ### Step 2: Unzip everything
22 | ```bash
23 | cd ${videomme_root}
24 | unzip subtitle.zip
25 | for file in videos_chunked_*.zip; do
26 |     unzip "$file"
27 | done
28 | ```
29 | 
30 | 
31 | ### Step 3: Extract frames of all videos
32 | ```bash
33 | cd ${retake_root}
34 | python scripts/utils/frame_extraction.py \
35 | --videofile_tpl ${videomme_root}/data/'*.mp4' \
36 | --results_dir ${videomme_root}/video_25fps \
37 | --fps 25 \
38 | --num_workers 32
39 | ```
40 | 
41 | 
42 | ### Step 4: Build VideoMME dataset
43 | ```bash
44 | cd ${retake_root}
45 | python scripts/utils/build_videomme_dataset.py \
46 | --hf_qwen2vl7b_path ${PATH_TO_Qwen2_VL_7B_Instruct} \
47 | --hf_root ${videomme_root}
48 | ```
49 | Note that you can NOT modify folder `${videomme_root}/video_25fps` after this step, since the absolute path of extracted frames are written into annotation files `video_mme.json` and `video_mme_subtitle.json`:
50 | ```
51 | retake_root/
52 | ├── dataset/
53 |     ├── video_mme/
54 |         ├── video_mme_subtitle.json
55 |         ├── video_mme.json
56 | ├── ...
57 | ```


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/docs/prepare_lvbench.md:
--------------------------------------------------------------------------------
 1 | ## Prepare LVBench Dataset
 2 | 
 3 | 
 4 | ### Step 1: Download LVBench data from [huggingface](https://huggingface.co/datasets/THUDM/LVBench/tree/main)
 5 | ```bash
 6 | git clone https://huggingface.co/datasets/THUDM/LVBench # Contain annotations only
 7 | git clone https://huggingface.co/datasets/AIWinter/LVBench # Contain videos only
 8 | ```
 9 | Move all_files in `AIWinter/LVBench` into `THUDM/LVBench`.
10 | 
11 | Denote the root directory of download LVBench dataset as `lvbench_root`, it should has the following structure:
12 | ```
13 | ${lvbench_root}/
14 | ├── docs/
15 | ├── video_info.meta.jsonl
16 | ├── all_videos_split.zip.001
17 | ├── all_videos_split.zip.002
18 | ├── ...
19 | └── all_videos_split.zip.014
20 | ```
21 | 
22 | 
23 | ### Step 2: Unzip everything
24 | ```bash
25 | cd ${lvbench_root}
26 | cat all_videos_split.zip.* > all_videos.zip
27 | unzip all_videos.zip
28 | ```
29 | 
30 | 
31 | ### Step 3: Extract frames of all videos
32 | ```bash
33 | cd ${retake_root}
34 | python scripts/utils/frame_extraction.py \
35 | --videofile_tpl ${lvbench_root}/all_videos/'*.mp4' \
36 | --results_dir ${lvbench_root}/video_25fps \
37 | --fps 25 \
38 | --num_workers 32
39 | ```
40 | 
41 | 
42 | ### Step 4: Build LVBench dataset
43 | ```bash
44 | cd ${retake_root}
45 | python scripts/utils/build_lvbench_dataset.py --hf_root ${lvbench_root}
46 | ```
47 | Note that you can NOT modify folder `${lvbench_root}/video_25fps` after this step, since the absolute path of extracted frames are written into annotation files `lvbench.json`:
48 | ```
49 | retake_root/
50 | ├── dataset/
51 |     ├── lvbench/
52 |         ├── lvbench.json
53 | ├── ...
54 | ```


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/configs/kvcomp_deepseek_v2_lite_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model_name": "DeepSeek/DeepSeek-V2-Lite-Chat",
 3 |     "is_mla": true,
 4 |     "hash_weight_type": "random",
 5 |     "num_hidden_layers": 27,
 6 |     "seq_len_threshhold": 2048,
 7 |     "chunk_size": 128,
 8 |     "chunk_repre_method": "max",
 9 |     "head_dim": 576,
10 |     "hash_bits": 128,
11 |     "top_k_ratio_per_layer": [
12 |         1,
13 |         1,
14 |         0.3,
15 |         0.3,
16 |         0.3,
17 |         0.3,
18 |         0.3,
19 |         0.3,
20 |         0.3,
21 |         0.3,
22 |         0.3,
23 |         0.3,
24 |         0.3,
25 |         0.3,
26 |         0.3,
27 |         0.3,
28 |         0.3,
29 |         0.3,
30 |         0.3,
31 |         0.3,
32 |         0.3,
33 |         0.3,
34 |         0.3,
35 |         0.3,
36 |         1,
37 |         1,
38 |         1
39 |     ],
40 |     "top_k_index_reuse": [
41 |         -1,
42 |         -1,
43 |         -1,
44 |         -1,
45 |         -1,
46 |         -1,
47 |         -1,
48 |         -1,
49 |         -1,
50 |         -1,
51 |         -1,
52 |         -1,
53 |         -1,
54 |         -1,
55 |         -1,
56 |         -1,
57 |         -1,
58 |         -1,
59 |         -1,
60 |         -1,
61 |         -1,
62 |         -1,
63 |         -1,
64 |         -1,
65 |         -1,
66 |         -1,
67 |         -1
68 |     ],
69 |     "must_select_blocks": [
70 |         0,
71 |         -2,
72 |         -1
73 |     ],
74 |     "hash_weight": null,
75 |     "kv_lora_rank": 512,
76 |     "qk_rope_head_dim": 64,
77 |     "hash_bits_kv_lora": 512,
78 |     "hash_bits_qk_rope": 64,
79 |     "hash_weight_kv_lora": null,
80 |     "hash_weight_qk_rope": null
81 | }


--------------------------------------------------------------------------------
/test/suites/E2E/test_evaluator.py:
--------------------------------------------------------------------------------
 1 | import dataclasses
 2 | 
 3 | import pytest
 4 | from common.capture_utils import export_vars
 5 | from common.config_utils import config_utils as config_instance
 6 | from common.uc_eval.task import DocQaEvalTask
 7 | from common.uc_eval.utils.data_class import EvalConfig, ModelConfig
 8 | 
 9 | 
10 | @pytest.fixture(scope="session")
11 | def model_config() -> ModelConfig:
12 |     cfg = config_instance.get_config("models") or {}
13 |     field_name = [field.name for field in dataclasses.fields(ModelConfig)]
14 |     kwargs = {k: v for k, v in cfg.items() if k in field_name and v is not None}
15 |     return ModelConfig(**kwargs)
16 | 
17 | 
18 | doc_qa_eval_cases = [
19 |     pytest.param(
20 |         EvalConfig(
21 |             data_type="doc_qa",
22 |             dataset_file_path="common/uc_eval/datasets/doc_qa/demo.jsonl",
23 |             enable_prefix_cache=False,
24 |             parallel_num=1,
25 |             benchmark_mode="evaluate",
26 |             metrics=["accuracy", "bootstrap-accuracy", "f1-score"],
27 |             eval_class="common.uc_eval.utils.metric:Includes",
28 |         ),
29 |         id="doc-qa-complete-recalculate-evaluate",
30 |     )
31 | ]
32 | 
33 | 
34 | @pytest.mark.feature("eval_test")
35 | @pytest.mark.stage(2)
36 | @pytest.mark.parametrize("eval_config", doc_qa_eval_cases)
37 | @export_vars
38 | def test_doc_qa_perf(
39 |     eval_config: EvalConfig, model_config: ModelConfig, request: pytest.FixtureRequest
40 | ):
41 |     file_save_path = config_instance.get_config("reports").get("base_dir")
42 |     task = DocQaEvalTask(model_config, eval_config, file_save_path)
43 |     result = task.run()
44 |     return {"_name": request.node.callspec.id, "_data": result}
45 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/device.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_TRANS_DEVICE_H
25 | #define UNIFIEDCACHE_TRANS_DEVICE_H
26 | 
27 | #include "buffer.h"
28 | #include "stream.h"
29 | 
30 | namespace UC::Trans {
31 | 
32 | class Device {
33 | public:
34 |     Status Setup(int32_t deviceId);
35 |     std::unique_ptr<Stream> MakeStream();
36 |     std::unique_ptr<Stream> MakeSMStream();
37 |     std::unique_ptr<Buffer> MakeBuffer();
38 | };
39 | 
40 | } // namespace UC::Trans
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/ucm/shared/infra/time/now_time.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_SHARED_INFRA_TIME_NOW_TIME_H
25 | #define UNIFIEDCACHE_SHARED_INFRA_TIME_NOW_TIME_H
26 | 
27 | #include <chrono>
28 | 
29 | namespace UC {
30 | 
31 | class NowTime {
32 | public:
33 |     static auto Now()
34 |     {
35 |         auto now = std::chrono::steady_clock::now().time_since_epoch();
36 |         return std::chrono::duration<double>(now).count();
37 |     }
38 | };
39 | 
40 | }  // namespace UC
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/cuda/cuda_buffer.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_TRANS_CUDA_BUFFER_H
25 | #define UNIFIEDCACHE_TRANS_CUDA_BUFFER_H
26 | 
27 | #include "trans/detail/reserved_buffer.h"
28 | 
29 | namespace UC::Trans {
30 | 
31 | class CudaBuffer : public ReservedBuffer {
32 | public:
33 |     std::shared_ptr<void> MakeDeviceBuffer(size_t size) override;
34 |     std::shared_ptr<void> MakeHostBuffer(size_t size) override;
35 | };
36 | 
37 | } // namespace UC::Trans
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/simu/simu_buffer.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_TRANS_SIMU_BUFFER_H
25 | #define UNIFIEDCACHE_TRANS_SIMU_BUFFER_H
26 | 
27 | #include "trans/detail/reserved_buffer.h"
28 | 
29 | namespace UC::Trans {
30 | 
31 | class SimuBuffer : public ReservedBuffer {
32 | public:
33 |     std::shared_ptr<void> MakeDeviceBuffer(size_t size) override;
34 |     std::shared_ptr<void> MakeHostBuffer(size_t size) override;
35 | };
36 | 
37 | } // namespace UC::Trans
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/ascend/ascend_buffer.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_TRANS_ASCEND_BUFFER_H
25 | #define UNIFIEDCACHE_TRANS_ASCEND_BUFFER_H
26 | 
27 | #include "trans/detail/reserved_buffer.h"
28 | 
29 | namespace UC::Trans {
30 | 
31 | class AscendBuffer : public ReservedBuffer {
32 | public:
33 |     std::shared_ptr<void> MakeDeviceBuffer(size_t size) override;
34 |     std::shared_ptr<void> MakeHostBuffer(size_t size) override;
35 | };
36 | 
37 | } // namespace UC::Trans
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.18)
 2 | project(unified-cache-management VERSION 1.0.0 LANGUAGES CXX)
 3 | 
 4 | set(CMAKE_CXX_STANDARD 17)
 5 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 6 | set(CMAKE_CXX_EXTENSIONS OFF)
 7 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 8 | 
 9 | option(BUILD_UCM_STORE "build ucm store module." ON)
10 | option(BUILD_UCM_SPARSE "build ucm sparse module." OFF)
11 | option(BUILD_UNIT_TESTS "build all unit test suits." OFF)
12 | option(BUILD_NUMA "build numactl library." OFF)
13 | option(DOWNLOAD_DEPENDENCE "download dependence by cmake." ON)
14 | set(RUNTIME_ENVIRONMENT "simu" CACHE STRING "runtime: simu, ascend, musa or cuda.")
15 | set(LOGGER_BACKEND "spdlog" CACHE STRING "backend: spdlog or flux.")
16 | 
17 | execute_process(COMMAND git rev-parse HEAD OUTPUT_VARIABLE UCM_COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE)
18 | add_compile_definitions(UCM_PROJECT_NAME="${PROJECT_NAME}")
19 | add_compile_definitions(UCM_PROJECT_VERSION="${PROJECT_VERSION}")
20 | add_compile_definitions(UCM_COMMIT_ID="${UCM_COMMIT_ID}")
21 | add_compile_definitions(UCM_BUILD_TYPE="${CMAKE_BUILD_TYPE}")
22 | 
23 | set(CMAKE_SKIP_RPATH TRUE)
24 | set(FLAGS_PUBLIC "-Wall -Werror -fPIC -Wl,-z,relro,-z,now")
25 | set(FLAGS_DEBUG "-O0 -g")
26 | set(FLAGS_RELEASE "-O3 -D_FORTIFY_SOURCE=2")
27 | string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWER)
28 | if(CMAKE_BUILD_TYPE_LOWER STREQUAL "debug")
29 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_PUBLIC} ${FLAGS_DEBUG}")
30 | else()
31 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS_PUBLIC} ${FLAGS_RELEASE}")
32 | endif()
33 | if(BUILD_UNIT_TESTS)
34 |     enable_testing()
35 | endif()
36 | 
37 | add_subdirectory(ucm)
38 | if(BUILD_UNIT_TESTS)
39 |     add_subdirectory(test)
40 | endif()
41 | 


--------------------------------------------------------------------------------
/ucm/shared/infra/template/singleton.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_INFRA_SINGLETON_H
25 | #define UNIFIEDCACHE_INFRA_SINGLETON_H
26 | 
27 | namespace UC {
28 | 
29 | template <typename T>
30 | class Singleton {
31 | public:
32 |     Singleton(const Singleton&) = delete;
33 |     Singleton& operator=(const Singleton&) = delete;
34 |     static T* Instance()
35 |     {
36 |         static T t;
37 |         return &t;
38 |     }
39 | 
40 | private:
41 |     Singleton() = default;
42 | };
43 | 
44 | } // namespace UC
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/ucm/store/nfsstore/cc/domain/hotness/hotness_set.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | 
25 | #ifndef UNIFIEDCACHE_HOTNESS_SET_H
26 | #define UNIFIEDCACHE_HOTNESS_SET_H
27 | 
28 | #include <unordered_set>
29 | #include <mutex>
30 | #include "space/space_layout.h"
31 | 
32 | namespace UC {
33 | 
34 | class HotnessSet {
35 | public:
36 |     void Insert(const std::string& blockId);
37 |     void UpdateHotness(const SpaceLayout* spaceLayout);
38 | 
39 | private:
40 |     std::mutex mutex_;
41 |     std::unordered_set<std::string> pendingBlocks_;
42 | };
43 | 
44 | 
45 | } // namespace UC
46 | 
47 | #endif


--------------------------------------------------------------------------------
/ucm/sparse/gsa/prefetch/src/pybinds.cpp:
--------------------------------------------------------------------------------
 1 | #pragma GCC diagnostic push
 2 | #include <pybind11/functional.h>
 3 | #include <pybind11/pybind11.h>
 4 | #include <pybind11/stl.h>
 5 | #include <torch/extension.h>
 6 | #pragma GCC diagnostic pop
 7 | #include "kvcache_pre.h"
 8 | 
 9 | namespace ucmprefetch {
10 | PYBIND11_MODULE(gsa_prefetch, m)
11 | {
12 |     pybind11::class_<ucmprefetch::GSAPrefetchEngineC>(m, "GSAPrefetchEngineC")
13 |         .def(pybind11::init<torch::Tensor&, torch::Tensor&, torch::Tensor&, torch::Tensor&,
14 |                             std::vector<uint32_t>&, bool, bool, int, int, int, bool>())
15 |         .def("set_blocks_map", &ucmprefetch::GSAPrefetchEngineC::SetBlocksMap)
16 |         .def("set_blocks_map_multilayer", &ucmprefetch::GSAPrefetchEngineC::SetBlocksMapMultiLayer)
17 |         .def("add_blocks_map", &ucmprefetch::GSAPrefetchEngineC::AddBlocksMap)
18 |         .def("del_blocks_map", &ucmprefetch::GSAPrefetchEngineC::DelBlocksMap)
19 |         .def("run_async_prefetch_bs", &ucmprefetch::GSAPrefetchEngineC::RunAsyncPrefetchBs)
20 |         .def("set_blocks_table_info", &ucmprefetch::GSAPrefetchEngineC::SetBlockTableInfo)
21 |         .def("get_prefetch_status", &ucmprefetch::GSAPrefetchEngineC::GetPrefetchStatus)
22 |         .def("set_prefetch_status", &ucmprefetch::GSAPrefetchEngineC::SetPrefetchStatus)
23 |         .def("set_modelrunning_status", &ucmprefetch::GSAPrefetchEngineC::SetModelRunningStatus)
24 |         .def("obtain_load_blocks", &ucmprefetch::GSAPrefetchEngineC::ObtainLoadBlocks)
25 |         .def("obtain_miss_idxs", &ucmprefetch::GSAPrefetchEngineC::ObtainMissIdxs)
26 |         .def("obtain_docs_map", &ucmprefetch::GSAPrefetchEngineC::ObtainDocsMap)
27 |         .def("obtain_blocks_map", &ucmprefetch::GSAPrefetchEngineC::ObtainBlocksMap);
28 | }
29 | } // namespace ucmprefetch
30 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/scripts/submission/prepare_videomme_submission.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import pandas as pd
 5 | 
 6 | predict_result_dir = "results/path_to_results"
 7 | output_file = "./ReTaKe_videomme_submission.json"
 8 | 
 9 | 
10 | videomme_hf_root = "/Video-MME/origin_data"
11 | data_root = "./dataset"
12 | 
13 | 
14 | ################ DO NOT CHANGE ################
15 | annos = pd.read_parquet(
16 |     os.path.join(videomme_hf_root, "videomme", "test-00000-of-00001.parquet")
17 | )
18 | with open(os.path.join(predict_result_dir, "generated_predictions.jsonl"), "r") as f:
19 |     responses = [json.loads(line) for line in f.readlines()]
20 | 
21 | video_id2results = {}
22 | for idx, row in annos.iterrows():
23 |     video_id = row["video_id"]
24 |     if video_id in video_id2results:
25 |         video_results = video_id2results[video_id]
26 |     else:
27 |         video_results = dict(
28 |             video_id=video_id,
29 |             duration=row["duration"],
30 |             domain=row["domain"],
31 |             sub_category=row["sub_category"],
32 |         )
33 |     questions = video_results.get("questions", [])
34 |     questions.append(
35 |         dict(
36 |             question_id=row["question_id"],
37 |             task_type=row["task_type"],
38 |             question=row["question"],
39 |             options=row["options"].tolist(),
40 |             answer=row["answer"],
41 |             response=responses[idx]["predict"],
42 |         )
43 |     )
44 |     video_results["questions"] = questions
45 |     video_id2results[video_id] = video_results
46 | 
47 | submission_results = []
48 | for video_results in video_id2results.values():
49 |     submission_results.append(video_results)
50 | 
51 | 
52 | with open(output_file, "w") as f:
53 |     json.dump(submission_results, f, indent=2)
54 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/docs/prepare_longvideobench.md:
--------------------------------------------------------------------------------
 1 | ****## Prepare LongVideoBench Dataset
 2 | 
 3 | 
 4 | ### Step 1: Download LongVideoBench dataset from [huggingface](git clone https://huggingface.co/datasets/longvideobench/LongVideoBench)
 5 | ```bash
 6 | git clone git clone https://huggingface.co/datasets/longvideobench/LongVideoBench
 7 | ```
 8 | 
 9 | Denote the root directory of download LongVideoBench dataset as `longvideobench_root`, it should has the following structure:
10 | ```
11 | ${longvideobench_root}/
12 | ├── subtitles.zip
13 | ├── test-00000-of-00001.parquet
14 | ├── validation-00000-of-00001.parquet
15 | ├── videos.tar.part.aa
16 | ├── ...
17 | └── videos.tar.part.be
18 | ├── ...
19 | ```
20 | 
21 | 
22 | ### Step 2: Unzip everything
23 | ```bash
24 | cd ${longvideobench_root}
25 | tar -xvf subtitles.tar
26 | cat videos.tar.part.* > videos.tar
27 | tar -xvf videos.tar
28 | ```
29 | 
30 | 
31 | ### Step 3: Extract frames of all videos
32 | ```bash
33 | cd ${retake_root}
34 | python scripts/utils/frame_extraction.py \
35 | --videofile_tpl ${longvideobench_root}/videos/'*.mp4' \
36 | --results_dir ${longvideobench_root}/video_25fps \
37 | --fps 25 \
38 | --num_workers 32
39 | ```
40 | 
41 | 
42 | ### Step 4: Build LongVideoBench dataset
43 | ```bash
44 | cd ${retake_root}
45 | python scripts/utils/build_longvideobench_dataset.py \
46 | --hf_root ${longvideobench_root} \
47 | --hf_qwen2vl7b_path ${PATH_TO_Qwen2_VL_7B_Instruct}
48 | ```
49 | Note that you can NOT modify folder `${longvideobench_root}/video_25fps` after this step, since the absolute path of extracted frames are written into annotation files `longvideobench_val.json` and `longvideobench_test.json`:
50 | ```
51 | retake_root/
52 | ├── dataset/
53 |     ├── longvideobench/
54 |         ├── longvideobench_val.json
55 |         ├── longvideobench_test.json
56 | ├── ...
57 | ```


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/about-codeowners/
 2 | # for more info about CODEOWNERS file
 3 | 
 4 | * @mag1c-h @ygwpz @FangRun2 @Tarrei
 5 | /.github @Wwwzff @hek14 @ygwpz @mag1c-h @FangRun2 @Tarrei
 6 | 
 7 | /ucm/sparse @wuhuxiao @wangwenxin0312 @hek14 @ygwpz @mag1c-h
 8 | /ucm/sparse/cache_blend @wuhuxiao @hek14 @ygwpz @mag1c-h
 9 | /ucm/sparse/esa @wangwenxin0312 @hek14 @ygwpz @mag1c-h
10 | /ucm/sparse/gsa @Zbm1996 @zbb200819 @yxkyong @HaoLi980405  @wuhuxiao @hek14 @ygwpz @mag1c-h
11 | /ucm/sparse/kvcomp @leideng @pengwwang @wuhuxiao @hek14 @ygwpz @mag1c-h
12 | /ucm/sparse/kvstar @saki-daisuki @summer-ai007 @xwLearnsLLM @wuhuxiao @hek14 @ygwpz @mag1c-h
13 | 
14 | /ucm/store @mag1c-h @ygwpz
15 | /ucm/store/dramstore @harrisonyhq @mag1c-h @ygwpz
16 | /ucm/store/localstore @mag1c-h @ygwpz
17 | /ucm/store/mooncakestore @chinesezyc @mag1c-h @ygwpz
18 | /ucm/store/nfsstore @mag1c-h @ygwpz
19 | 
20 | /ucm/integration @qyh111 @harrisonyhq @ygwpz @mag1c-h @hek14
21 | 
22 | /ucm/pd @flesher0813 @ygwpz @mag1c-h
23 | 
24 | /ucm/sandbox @Wwwzff @hek14 @ygwpz @mag1c-h @FangRun2 @Tarrei
25 | 
26 | /benchmarks @flesher0813 @ygwpz @mag1c-h
27 | 
28 | /docker @harrisonyhq @ygwpz @mag1c-h
29 | 
30 | /docs @flesher0813 @ygwpz @mag1c-h @FangRun2 @Tarrei @hek14
31 | /docs/source/user-guide/sparse-attention/esa.md @wangwenxin0312 @hek14 @flesher0813 @ygwpz @mag1c-h @FangRun2 @Tarrei
32 | /docs/source/user-guide/sparse-attention/gsa.md @Zbm1996 @zbb200819 @yxkyong @HaoLi980405 @flesher0813 @ygwpz @mag1c-h @FangRun2 @Tarrei
33 | /docs/source/user-guide/sparse-attention/kvcomp.md @leideng @pengwwang @flesher0813 @ygwpz @mag1c-h @FangRun2 @Tarrei
34 | /docs/source/user-guide/sparse-attention/kvstar.md @saki-daisuki @summer-ai007 @flesher0813 @ygwpz @mag1c-h @FangRun2 @Tarrei
35 | 
36 | /examples @harrisonyhq @ygwpz @mag1c-h @hek14
37 | 
38 | /test @Wwwzff @ygwpz @mag1c-h
39 | 


--------------------------------------------------------------------------------
/ucm/shared/metrics/cc/stats/istats.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_ISTATS_H
25 | #define UNIFIEDCACHE_ISTATS_H
26 | 
27 | #include <memory>
28 | #include <string>
29 | #include <unordered_map>
30 | #include <vector>
31 | 
32 | namespace UC::Metrics {
33 | 
34 | class IStats {
35 | public:
36 |     virtual ~IStats() = default;
37 |     virtual std::string Name() const = 0;
38 |     virtual void Update(const std::unordered_map<std::string, double>& params) = 0;
39 |     virtual void Reset() = 0;
40 |     virtual std::unordered_map<std::string, std::vector<double>> Data() = 0;
41 | };
42 | 
43 | } // namespace UC::Metrics
44 | 
45 | #endif


--------------------------------------------------------------------------------
/ucm/store/detail/task/task_waiter.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_ITASK_WAITER_H
25 | #define UNIFIEDCACHE_ITASK_WAITER_H
26 | 
27 | #include "thread/latch.h"
28 | 
29 | namespace UC {
30 | 
31 | class TaskWaiter : public Latch {
32 | public:
33 |     TaskWaiter(const size_t expected, const double startTp) : Latch{}
34 |     {
35 |         this->startTp = startTp;
36 |         Set(expected);
37 |     }
38 |     using Latch::Wait;
39 |     virtual bool Wait(const size_t timeoutMs) noexcept { return WaitFor(timeoutMs); }
40 |     virtual bool Finish() noexcept { return Check(); }
41 | };
42 | 
43 | }  // namespace UC
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/300-usage.yml:
--------------------------------------------------------------------------------
 1 | name: 💻 Usage
 2 | description: Raise an issue here if you don't know how to use ucm.
 3 | title: "[Usage]: "
 4 | labels: ["usage"]
 5 | 
 6 | body:
 7 | - type: markdown
 8 |   attributes:
 9 |     value: >
10 |       #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/ModelEngine-Group/unified-cache-management/issues?q=is%3Aissue%20sort%3Acreated-desc).
11 | - type: textarea
12 |   attributes:
13 |     label: Your current environment
14 |     description: |
15 |       Please run the following and paste the output below.
16 |       **TODO: Add script to our project to collect the unifiedcache runtime environment, this following example comes from vllm-ascend**
17 |       ```sh
18 |       npu-smi info
19 |       cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
20 |       wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
21 |       # For security purposes, please feel free to check the contents of collect_env.py before running it.
22 |       python collect_env.py
23 |       ```
24 |       It is suggested to download and execute the latest script, as vllm might frequently update the diagnosis information needed for accurately and quickly responding to issues.
25 |     value: |
26 |       ```text
27 |       The output of above commands
28 |       ```
29 |   validations:
30 |     required: true
31 | - type: textarea
32 |   attributes:
33 |     label: How would you like to use ucm.
34 |     description: |
35 |       A detailed description of how you want to use unifiedcache.
36 |     value: |
37 |       I want to run inference of a [specific model](put link here). I don't know how to integrate it with unified.
38 | - type: markdown
39 |   attributes:
40 |     value: >
41 |       Thanks for contributing 🎉!
42 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/200-installation.yml:
--------------------------------------------------------------------------------
 1 | name: 🛠️ Installation
 2 | description: Report an issue here when you hit errors during installation.
 3 | title: "[Installation]: "
 4 | labels: ["installation"]
 5 | 
 6 | body:
 7 | - type: markdown
 8 |   attributes:
 9 |     value: >
10 |       #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/ModelEngine-Group/unified-cache-management/issues?q=is%3Aissue%20sort%3Acreated-desc).
11 | - type: textarea
12 |   attributes:
13 |     label: Your current environment
14 |     description: |
15 |       Please run the following and paste the output below.
16 |       **TODO: Add script to our project to collect the unifiedcache runtime environment, this following example comes from vllm-ascend**
17 |       ```sh
18 |       npu-smi info
19 |       cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
20 |       wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
21 |       # For security purposes, please feel free to check the contents of collect_env.py before running it.
22 |       python collect_env.py
23 |       ```
24 |       It is suggested to download and execute the latest script, as vllm might frequently update the diagnosis information needed for accurately and quickly responding to issues.
25 |     value: |
26 |       ```text
27 |       The output of `python collect_env.py`
28 |       ```
29 |   validations:
30 |     required: true
31 | - type: textarea
32 |   attributes:
33 |     label: How you are installing ucm, also vllm and vllm-ascend.
34 |     description: |
35 |       Paste the full command you are trying to execute.
36 |     value: |
37 |       ```sh
38 |       pip install -vvv unifiedcache
39 |       ```
40 | - type: markdown
41 |   attributes:
42 |     value: >
43 |       Thanks for contributing 🎉!
44 | 


--------------------------------------------------------------------------------
/ucm/store/nfsstore/cc/domain/space/space_shard_temp_layout.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_SPACE_SHARD_TEMP_LAYOUT_H
25 | #define UNIFIEDCACHE_SPACE_SHARD_TEMP_LAYOUT_H
26 | 
27 | #include "space_shard_layout.h"
28 | 
29 | namespace UC {
30 | 
31 | class SpaceShardTempLayout : public SpaceShardLayout {
32 | public:
33 |     std::string DataFileParent(const std::string& blockId, bool activated) const override;
34 |     std::string DataFilePath(const std::string& blockId, bool activated) const override;
35 | 
36 | protected:
37 |     std::vector<std::string> RelativeRoots() const override;
38 |     virtual std::string TempDataFileRoot() const;
39 | };
40 | 
41 | } // namespace UC
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/cuda/cuda_sm_kernel.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_TRANS_CUDA_SM_KERNEL_H
25 | #define UNIFIEDCACHE_TRANS_CUDA_SM_KERNEL_H
26 | 
27 | #include <cstddef>
28 | #include <cuda_runtime.h>
29 | 
30 | namespace UC::Trans {
31 | 
32 | cudaError_t CudaSMCopyAsync(void* src[], void* dst[], size_t size, size_t number,
33 |                             cudaStream_t stream);
34 | cudaError_t CudaSMCopyAsync(void* src[], void* dst, size_t size, size_t number,
35 |                             cudaStream_t stream);
36 | cudaError_t CudaSMCopyAsync(void* src, void* dst[], size_t size, size_t number,
37 |                             cudaStream_t stream);
38 | 
39 | } // namespace UC::Trans
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/cuda/cuda_sm_stream.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_TRANS_CUDA_SM_STREAM_H
25 | #define UNIFIEDCACHE_TRANS_CUDA_SM_STREAM_H
26 | 
27 | #include "cuda_stream.h"
28 | 
29 | namespace UC::Trans {
30 | 
31 | class CudaSmStream : public CudaStream {
32 | public:
33 |     Status DeviceToHostAsync(void* device[], void* host[], size_t size, size_t number) override;
34 |     Status DeviceToHostAsync(void* device[], void* host, size_t size, size_t number) override;
35 |     Status HostToDeviceAsync(void* host[], void* device[], size_t size, size_t number) override;
36 |     Status HostToDeviceAsync(void* host, void* device[], size_t size, size_t number) override;
37 | };
38 | 
39 | } // namespace UC::Trans
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/ucm/store/pcstore/cc/domain/space/space_manager.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_SPACE_MANAGER_H
25 | #define UNIFIEDCACHE_SPACE_MANAGER_H
26 | 
27 | #include "space_layout.h"
28 | 
29 | namespace UC {
30 | 
31 | class SpaceManager {
32 | public:
33 |     Status Setup(const std::vector<std::string>& storageBackends, const size_t blockSize);
34 |     Status NewBlock(const std::string& blockId);
35 |     Status CommitBlock(const std::string& blockId, bool success);
36 |     bool LookupBlock(const std::string& blockId) const;
37 |     const SpaceLayout* GetSpaceLayout() const { return &this->layout_; }
38 | 
39 | private:
40 |     SpaceLayout layout_;
41 |     size_t blockSize_;
42 | };
43 | 
44 | } // namespace UC
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/ucm/sparse/gsa/offload_ops/src/k_repre.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdexcept>
 2 | #include <cassert>
 3 | #include "k_repre.h"
 4 | 
 5 | 
 6 | namespace KRepre {
 7 | #define OMP_THREAD_NUM 32u
 8 | 
 9 | const VecProductClass& KRepreComputer::ThreadLocalVecProduct::GetInstance()
10 | {
11 |     thread_local static VecProductClass instance;
12 |     return instance;
13 | }
14 | 
15 | void KRepreComputer::ComputeKRepreBlock(const float* __restrict kArray,
16 |                         uint32_t kHead,
17 |                         uint32_t blockSize,
18 |                         uint32_t headSize,
19 |                         float* __restrict kRepreBlock) const
20 | {
21 |     // 获取本地线程实例
22 |     const auto& vecProduct = ThreadLocalVecProduct::GetInstance();
23 | 
24 |     for (uint32_t idxHead = 0; idxHead < kHead; ++idxHead) {
25 |         const float* kArraySingleHead = kArray + idxHead * blockSize * headSize;
26 |         float* kRepreBlockSingleHead = kRepreBlock + idxHead * headSize;
27 | 
28 |         vecProduct.VectorMean(
29 |             kArraySingleHead,
30 |             kRepreBlockSingleHead,
31 |             headSize,
32 |             blockSize
33 |         );
34 |     }
35 | }
36 |     
37 | void KRepreComputer::ComputeKRepre(const std::vector<float*>& kArray,
38 |                    uint32_t numBlock,
39 |                    uint32_t kHead,
40 |                    uint32_t blockSize,
41 |                    uint32_t headSize,
42 |                    const std::vector<float*>& kRepreBlockArray) const
43 | {
44 | #pragma omp parallel for num_threads(OMP_THREAD_NUM)
45 |     for (uint32_t idxBlock = 0; idxBlock < numBlock; ++idxBlock) {
46 |         const float* kArrayCurrentBlock = kArray[idxBlock];
47 |         float * KRepreCurrentBlock = kRepreBlockArray[idxBlock];
48 | 
49 |         ComputeKRepreBlock(
50 |             kArrayCurrentBlock,
51 |             kHead,
52 |             blockSize,
53 |             headSize,
54 |             KRepreCurrentBlock
55 |         );
56 |     }
57 | }
58 | }


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/configs/kvcomp_qwen3_4B_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model_name": "Qwen/Qwen3-4B",
 3 |     "is_mla": false,
 4 |     "hash_weight_type": "random",
 5 |     "num_hidden_layers": 36,
 6 |     "seq_len_threshhold": 2048,
 7 |     "chunk_size": 128,
 8 |     "chunk_repre_method": "max",
 9 |     "head_dim": 128,
10 |     "hash_bits": 128,
11 |     "top_k_ratio_per_layer": [
12 |         1,
13 |         1,
14 |         0.3,
15 |         0.3,
16 |         0.3,
17 |         0.3,
18 |         0.3,
19 |         0.3,
20 |         0.3,
21 |         0.3,
22 |         0.3,
23 |         0.3,
24 |         0.3,
25 |         0.3,
26 |         0.3,
27 |         0.3,
28 |         0.3,
29 |         0.3,
30 |         0.3,
31 |         0.3,
32 |         0.3,
33 |         0.3,
34 |         0.3,
35 |         0.3,
36 |         0.3,
37 |         0.3,
38 |         0.3,
39 |         0.3,
40 |         0.3,
41 |         0.3,
42 |         0.3,
43 |         0.3,
44 |         0.3,
45 |         1,
46 |         1,
47 |         1
48 |     ],
49 |     "top_k_index_reuse": [
50 |         -1,
51 |         -1,
52 |         -1,
53 |         -1,
54 |         -1,
55 |         -1,
56 |         -1,
57 |         -1,
58 |         -1,
59 |         -1,
60 |         -1,
61 |         -1,
62 |         -1,
63 |         -1,
64 |         -1,
65 |         -1,
66 |         -1,
67 |         -1,
68 |         -1,
69 |         -1,
70 |         -1,
71 |         -1,
72 |         -1,
73 |         -1,
74 |         -1,
75 |         -1,
76 |         -1,
77 |         -1,
78 |         -1,
79 |         -1,
80 |         -1,
81 |         -1,
82 |         -1,
83 |         -1,
84 |         -1,
85 |         -1
86 |     ],
87 |     "must_select_blocks": [
88 |         0,
89 |         -2,
90 |         -1
91 |     ],
92 |     "hash_weight": null,
93 |     "kv_lora_rank": null,
94 |     "qk_rope_head_dim": null,
95 |     "hash_bits_kv_lora": null,
96 |     "hash_bits_qk_rope": null,
97 |     "hash_weight_kv_lora": null,
98 |     "hash_weight_qk_rope": null
99 | }


--------------------------------------------------------------------------------
/ucm/store/nfsstore/cc/domain/space/space_property.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | 
25 | #ifndef UNIFIEDCACHE_SPACE_PROPERTY_H
26 | #define UNIFIEDCACHE_SPACE_PROPERTY_H
27 | 
28 | #include "file/ifile.h"
29 | #include "status/status.h"
30 | 
31 | namespace UC {
32 | 
33 | class SpaceProperty {
34 | public:
35 |     ~SpaceProperty();
36 |     Status Setup(const std::string& propertyFilePath);
37 |     void IncreaseCapacity(const size_t delta);
38 |     void DecreaseCapacity(const size_t delta);
39 |     size_t GetCapacity() const;
40 | 
41 | private:
42 |     Status InitShmProperty(IFile* shmPropertyFile);
43 |     Status LoadShmProperty(IFile* shmPropertyFile);
44 | 
45 | private:
46 |     void* addr_{nullptr};
47 | };
48 | 
49 | } // namespace UC
50 | 
51 | #endif


--------------------------------------------------------------------------------
/ucm/shared/infra/time/stopwatch.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_INFRA_STOPWATCH_H
25 | #define UNIFIEDCACHE_INFRA_STOPWATCH_H
26 | 
27 | #include <chrono>
28 | 
29 | namespace UC {
30 | 
31 | class StopWatch {
32 |     using clock = std::chrono::steady_clock;
33 |     std::chrono::time_point<clock> startTp_;
34 | 
35 | public:
36 |     StopWatch() : startTp_{clock::now()} {}
37 |     std::chrono::duration<double> Elapsed() const
38 |     {
39 |         return std::chrono::duration<double>(clock::now() - startTp_);
40 |     }
41 |     std::chrono::milliseconds ElapsedMs() const
42 |     {
43 |         return std::chrono::duration_cast<std::chrono::milliseconds>(clock::now() - startTp_);
44 |     }
45 |     void Reset() { startTp_ = clock::now(); }
46 | };
47 | 
48 | } // namespace UC
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | project = "Unified Cache Manager"
10 | copyright = "2025, Unified Cache Manager Team"
11 | author = "Unified Cache Manager Team"
12 | release = ""
13 | 
14 | # -- General configuration ---------------------------------------------------
15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
16 | 
17 | # Copy from https://github.com/vllm-project/vllm/blob/main/docs/source/conf.py
18 | extensions = [
19 |     "sphinx.ext.napoleon",
20 |     "sphinx.ext.intersphinx",
21 |     "sphinx_copybutton",
22 |     "sphinx.ext.autodoc",
23 |     "sphinx.ext.autosummary",
24 |     "myst_parser",
25 |     "sphinxarg.ext",
26 |     "sphinx_design",
27 |     "sphinx_togglebutton",
28 |     "sphinx_substitution_extensions",
29 | ]
30 | 
31 | myst_enable_extensions = ["colon_fence", "substitution"]
32 | 
33 | # templates_path = ['_templates']
34 | exclude_patterns = []
35 | 
36 | 
37 | # -- Options for HTML output -------------------------------------------------
38 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
39 | 
40 | html_title = project
41 | html_theme = "sphinx_book_theme"
42 | html_static_path = ["_static"]
43 | html_css_files = ["css/logo.css"]
44 | html_theme_options = {
45 |     "path_to_docs": "docs/source",
46 |     "repository_url": "https://github.com/ModelEngine-Group/unified-cache-management",
47 |     "use_repository_button": True,
48 |     "use_edit_page_button": True,
49 |     "logo": {
50 |         "image_light": "logos/UCM-light.png",
51 |         "image_dark": "logos/UCM-dark.png",
52 |         "alt_text": "UCM",
53 |     },
54 | }
55 | 
56 | # language = 'zh_CN'
57 | 


--------------------------------------------------------------------------------
/ucm/shared/metrics/test/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # MIT License
 4 | #
 5 | # Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 6 | #
 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | # of this software and associated documentation files (the "Software"), to deal
 9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in all
15 | # copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | # SOFTWARE.
24 | #
25 | 
26 | 
27 | import os
28 | import sys
29 | 
30 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
31 | from ucm.shared.metrics import ucmmonitor
32 | 
33 | # import monitor
34 | 
35 | mon = ucmmonitor.StatsMonitor.get_instance()
36 | mon.update_stats(
37 |     "ConnStats",
38 |     {
39 |         "save_duration": 1.2,
40 |         "save_speed": 300.5,
41 |         "load_duration": 0.8,
42 |         "load_speed": 450.0,
43 |         "interval_lookup_hit_rates": 0.95,
44 |     },
45 | )
46 | mon.update_stats(
47 |     "ConnStats",
48 |     {
49 |         "save_duration": 1.2,
50 |         "save_speed": 300.5,
51 |         "load_duration": 0.8,
52 |         "load_speed": 450.0,
53 |         "interval_lookup_hit_rates": 0.95,
54 |     },
55 | )
56 | 
57 | data = mon.get_stats("ConnStats")
58 | print(data)
59 | 


--------------------------------------------------------------------------------
/ucm/sparse/utils.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | 
 5 | DEFAULT_BLOCK_SIZE = 128
 6 | MIN_TOPK_LEN = 32
 7 | MAX_TOPK_LEN = 48
 8 | MAX_BS = 256
 9 | SEG_PREFILL_THRESHOLD = 8400
10 | CUDA_TOPK = False
11 | PTOPK_PREFETCH_ENABLE = False
12 | VLLM_CUDA_MEM_ALIGN_KV_CACHE = False
13 | INIT_WINDOW_SZ = 1
14 | NUM_PREFETCH_BLOCKS = 1
15 | NUM_GSA_BLOCKS = 1
16 | 
17 | 
18 | class GSAConfig:
19 |     def __init__(self):
20 |         self.block_size = DEFAULT_BLOCK_SIZE
21 |         self.init_windows_size = INIT_WINDOW_SZ
22 |         self.num_prefetch_blocks = NUM_PREFETCH_BLOCKS
23 |         self.min_topk_len = MIN_TOPK_LEN
24 |         self.max_topk_len = MAX_TOPK_LEN
25 | 
26 |     def set_config(self, block_szie):
27 |         self.block_size = block_szie
28 |         self.min_topk_len = math.ceil(MIN_TOPK_LEN * DEFAULT_BLOCK_SIZE / block_szie)
29 |         self.max_topk_len = math.ceil(MAX_TOPK_LEN * DEFAULT_BLOCK_SIZE / block_szie)
30 |         self.num_prefetch_blocks = math.ceil(
31 |             NUM_PREFETCH_BLOCKS * DEFAULT_BLOCK_SIZE / block_szie
32 |         )
33 |         self.init_windows_size = math.ceil(
34 |             INIT_WINDOW_SZ * DEFAULT_BLOCK_SIZE / block_szie
35 |         )
36 |         self.num_gsa_blocks = math.ceil(
37 |             NUM_GSA_BLOCKS * DEFAULT_BLOCK_SIZE / block_szie
38 |         )
39 | 
40 |     def compute_topk_len(self, raw_seq_len):
41 |         topk_len = math.ceil(raw_seq_len * 0.3)
42 |         # topk_len = max(1, topk_len)
43 |         if topk_len < self.min_topk_len:
44 |             topk_len = min(self.min_topk_len, raw_seq_len)
45 |         elif topk_len > self.max_topk_len:
46 |             topk_len = self.max_topk_len
47 |         return topk_len
48 | 
49 | 
50 | gsa_config = GSAConfig()
51 | 
52 | 
53 | def round_up(x: int, y: int) -> int:
54 |     return ((x + y - 1) // y) * y
55 | 
56 | 
57 | def get_type_size(dtype: torch.dtype) -> int:
58 |     return torch.tensor([], dtype=dtype).element_size()
59 | 
60 | 
61 | def align_to_256bytes(extent: int, dtype: torch.dtype) -> int:
62 |     dtype_szie = get_type_size(dtype)
63 |     eles_per_256bytes = 256 // dtype_szie
64 |     return round_up(extent, eles_per_256bytes)
65 | 


--------------------------------------------------------------------------------
/ucm/shared/vendor/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | function(EnableDept)
 2 |     cmake_parse_arguments(DEPT "" "NAME;TAG" "GIT_URLS" ${ARGN})
 3 |     find_program(GIT_EXECUTABLE git)
 4 |     if(NOT GIT_EXECUTABLE)
 5 |         message(FATAL_ERROR "git not found!")
 6 |     endif()
 7 |     foreach(GIT_URL IN LISTS DEPT_GIT_URLS)
 8 |         execute_process(
 9 |             COMMAND ${GIT_EXECUTABLE} ls-remote --heads "${GIT_URL}"
10 |             RESULT_VARIABLE GIT_RESULT
11 |             OUTPUT_QUIET
12 |             ERROR_QUIET
13 |             TIMEOUT 15
14 |         )
15 |         if(GIT_RESULT EQUAL 0)
16 |             set(VALID_GIT_URL ${GIT_URL})
17 |             break()
18 |         endif()
19 |     endforeach()
20 |     if(NOT VALID_GIT_URL)
21 |         message(FATAL_ERROR "all urls for ${DEPT_NAME} are not reachable!")
22 |     endif()
23 |     message(STATUS "Fetching ${DEPT_NAME}(${DEPT_TAG}) from ${VALID_GIT_URL}")
24 |     FetchContent_Declare(${DEPT_NAME} GIT_REPOSITORY ${VALID_GIT_URL} GIT_TAG ${DEPT_TAG} GIT_SHALLOW TRUE)
25 |     string(TOUPPER ${DEPT_NAME} NAME_UPPER)
26 |     set(${NAME_UPPER}_INSTALL OFF CACHE INTERNAL "" FORCE)
27 |     set(${NAME_UPPER}_BUILD_TESTS OFF CACHE INTERNAL "" FORCE)
28 |     set(${NAME_UPPER}_BUILD_EXAMPLES OFF CACHE INTERNAL "" FORCE)
29 |     FetchContent_MakeAvailable(${DEPT_NAME})
30 | endfunction()
31 | 
32 | if(DOWNLOAD_DEPENDENCE)
33 |     include(FetchContent)
34 |     EnableDept(
35 |         NAME fmt
36 |         TAG 11.2.0
37 |         GIT_URLS
38 |             https://github.com/fmtlib/fmt.git
39 |             https://gitcode.com/GitHub_Trending/fm/fmt.git
40 |     )
41 |     EnableDept(
42 |         NAME spdlog
43 |         TAG v1.15.3
44 |         GIT_URLS
45 |             https://github.com/gabime/spdlog.git
46 |             https://gitcode.com/GitHub_Trending/sp/spdlog.git
47 |     )
48 |     EnableDept(
49 |         NAME pybind11
50 |         TAG v3.0.1
51 |         GIT_URLS
52 |             https://github.com/pybind/pybind11.git
53 |             https://gitcode.com/GitHub_Trending/py/pybind11.git
54 |     )
55 | else()
56 |     add_subdirectory(fmt)
57 |     add_subdirectory(spdlog)
58 |     add_subdirectory(pybind11)
59 | endif()
60 | 


--------------------------------------------------------------------------------
/ucm/sparse/gsa/offload_ops/include/k_repre.h:
--------------------------------------------------------------------------------
 1 | #ifndef K_REPRE_H
 2 | #define K_REPRE_H
 3 | 
 4 | #include <cstdint>
 5 | #include <cstddef>
 6 | #include <string>
 7 | #include <vector>
 8 | #include "vec_product.h"
 9 | 
10 | namespace KRepre {
11 | 
12 | using VecProductClass = VecProduct::VecProduct;
13 | 
14 | /**
15 | * @brief Key表征计算器
16 | *
17 | * 提供基于向量均值的Key表征计算功能，支持多线程并行计算和SIMD优化
18 | */
19 | class KRepreComputer {
20 | public:
21 |     KRepreComputer() = default;
22 |     
23 |     /**
24 |     * @brief 禁用拷贝构造和赋值
25 |     */
26 |     KRepreComputer(const KRepreComputer&) = delete;
27 |     KRepreComputer& operator=(const KRepreComputer&) = delete;
28 | 
29 |     /**
30 |     * @brief 计算单个Block的K表征
31 |     * 
32 |     * @param kArray k向量指针数组 [kHead, blockSize, headSize]
33 |     * @param kHead k头数量
34 |     * @param blockSize block内k向量数量
35 |     * @param headSize 向量维度
36 |     * @param kRepreBlock 单block 表征 [kHead, headSize]
37 |     */
38 |     void ComputeKRepreBlock(const float* __restrict kArray,
39 |                             uint32_t kHead,
40 |                             uint32_t blockSize,
41 |                             uint32_t headSize,
42 |                             float* __restrict kRepreBlock) const;
43 |     
44 |     /**
45 |     * @brief 计算多个Block的K表征（使用OpenMP并行优化）
46 |     * 
47 |     * @param kArray k向量指针数组 [kHead, blockSize, headSize]
48 |     * @param numBlock block数量
49 |     * @param kHead k头数量
50 |     * @param blockSize block内k向量数量
51 |     * @param headSize 向量维度
52 |     * @param kRepreBlockArray 全量K表征 [numBlock, kHead, x, headSize]
53 |     */
54 |     void ComputeKRepre(const std::vector<float*>& kArray,
55 |                        uint32_t numBlock,
56 |                        uint32_t kHead,
57 |                        uint32_t blockSize,
58 |                        uint32_t headSize,
59 |                        const std::vector<float*>& kRepreBlockArray) const;
60 | 
61 | private:
62 |     // 线程本地VecProduct实例管理
63 |     class ThreadLocalVecProduct {
64 |     public:
65 |         static const VecProductClass& GetInstance();
66 |     private:
67 |         ThreadLocalVecProduct() = default;
68 |     };
69 | };
70 | 
71 | }
72 | 
73 | #endif
74 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/buffer.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_TRANS_BUFFER_H
25 | #define UNIFIEDCACHE_TRANS_BUFFER_H
26 | 
27 | #include <memory>
28 | #include "status/status.h"
29 | 
30 | namespace UC::Trans {
31 | 
32 | class Buffer {
33 | public:
34 |     virtual ~Buffer() = default;
35 | 
36 |     virtual std::shared_ptr<void> MakeDeviceBuffer(size_t size) = 0;
37 |     virtual Status MakeDeviceBuffers(size_t size, size_t number) = 0;
38 |     virtual std::shared_ptr<void> GetDeviceBuffer(size_t size) = 0;
39 | 
40 |     virtual std::shared_ptr<void> MakeHostBuffer(size_t size) = 0;
41 |     virtual Status MakeHostBuffers(size_t size, size_t number) = 0;
42 |     virtual std::shared_ptr<void> GetHostBuffer(size_t size) = 0;
43 | 
44 |     static Status RegisterHostBuffer(void* host, size_t size, void** pDevice = nullptr);
45 |     static void UnregisterHostBuffer(void* host);
46 | };
47 | 
48 | } // namespace UC::Trans
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/ucm/sparse/esa/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if(BUILD_NUMA)
 2 |     message(STATUS "Building numactl library...")
 3 | 
 4 |     set(NUMA_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/numa_install)
 5 |     FetchContent_Declare(
 6 |         numactl
 7 |         URL https://github.com/numactl/numactl/releases/download/v2.0.16/numactl-2.0.16.tar.gz
 8 |         TLS_VERIFY OFF
 9 |     )
10 |     FetchContent_MakeAvailable(numactl)
11 |     if(NOT EXISTS "${NUMA_INSTALL_DIR}/lib/libnuma.so")
12 |         message(STATUS "Configuring numactl...")
13 |         execute_process(
14 |             COMMAND ./configure --prefix=${NUMA_INSTALL_DIR}
15 |             WORKING_DIRECTORY ${numactl_SOURCE_DIR}
16 |             RESULT_VARIABLE numa_configure_result
17 |             OUTPUT_VARIABLE numa_configure_output
18 |             ERROR_VARIABLE numa_configure_error
19 |         )
20 |         if(NOT numa_configure_result EQUAL 0)
21 |             message(FATAL_ERROR "Failed to configure numactl. \n"
22 |                                 "Result: ${numa_configure_result}\n"
23 |                                 "STDOUT: ${numa_configure_output}\n"
24 |                                 "STDERR: ${numa_configure_error}\n")
25 |         endif()
26 | 
27 |         message(STATUS "Building and installing numactl...")
28 |         execute_process(
29 |             COMMAND make install -j8
30 |             WORKING_DIRECTORY ${numactl_SOURCE_DIR}
31 |             RESULT_VARIABLE numa_install_result
32 |             OUTPUT_VARIABLE numa_install_output
33 |             ERROR_VARIABLE numa_install_error
34 |         )
35 |         if(NOT numa_install_result EQUAL 0)
36 |             message(FATAL_ERROR "Failed to build and install numactl. \n"
37 |                                 "Result: ${numa_install_result}\n"
38 |                                 "STDOUT: ${numa_install_output}\n"
39 |                                 "STDERR: ${numa_install_error}\n")
40 |         endif()
41 |     else()
42 |         message(STATUS "Found already built libnuma. Skipping build.")
43 |     endif()
44 | 
45 |     add_definitions(-DNUMA_ENABLED)
46 | else()
47 |     message(STATUS "Skipping numactl build...")
48 | endif()
49 | 
50 | add_subdirectory(retrieval)
51 | 


--------------------------------------------------------------------------------
/ucm/sandbox/sparse/retake/docs/prepare_mlvu.md:
--------------------------------------------------------------------------------
 1 | ## Prepare MLVU Dataset
 2 | 
 3 | 
 4 | ### Step 1: Download MLVU dataset from [huggingface](https://huggingface.co/datasets/MLVU/MVLU)
 5 | ```bash
 6 | git clone https://huggingface.co/datasets/MLVU/MVLU
 7 | git clone https://huggingface.co/datasets/MLVU/MLVU_Test
 8 | ```
 9 | 
10 | Denote the root directory of download MLVU dataset as `mlvu_root`, it should has the following structure:
11 | ```
12 | ${mlvu_root}/
13 | ├── MLVU/
14 |     ├── json
15 |         ...
16 |     ├── video
17 |         ...
18 | ├── figs/
19 | ```
20 | 
21 | Denote the root directory of download MLVU-Test dataset as `mlvu_test_root`, it should has the following structure:
22 | ```
23 | ${mlvu_test_root}/
24 | ├── MLVU_Test/
25 |     ├── test_question.json
26 |     ├── test_video.tar.gz.part-aa
27 |     ├── test_video.tar.gz.part-ab
28 |     ...
29 | ├── figs/
30 | ├── test_generation_tasks.json
31 | ├── test_multi_choice_tasks.json
32 | ```
33 | 
34 | Unzip MLVU-Test videos:
35 | ```bash
36 | cd MLVU_Test
37 | cat test_video.tar.gz.part-* | tar -xzvf -
38 | ```
39 | 
40 | 
41 | ### Step 2: Extract frames of all videos
42 | ```bash
43 | cd ${retake_root}
44 | python scripts/utils/frame_extraction.py \
45 | --videofile_tpl ${mlvu_root}/MLVU/video/'*/*.mp4' \
46 | --results_dir ${mlvu_root}/MLVU/video_25fps \
47 | --fps 25 \
48 | --num_workers 32
49 | python scripts/utils/frame_extraction.py \
50 | --videofile_tpl ${mlvu_test_root}/MLVU_Test/video/'*/*.mp4' \
51 | --results_dir ${mlvu_test_root}/MLVU_Test/video_25fps \
52 | --fps 25 \
53 | --num_workers 32
54 | ```
55 | 
56 | 
57 | ### Step 3: Build MLVU dataset
58 | ```bash
59 | cd ${retake_root}
60 | python scripts/utils/build_mlvu_dataset.py --hf_root ${mlvu_root}
61 | python scripts/utils/build_mlvu_test_dataset.py --hf_root ${mlvu_test_root}
62 | ```
63 | Note that you can NOT modify folder `${mlvu_root}/MLVU/video_25fps` and `${mlvu_test_root}/MLVU_Test/video_25fps` after this step, since the absolute path of extracted frames are written into annotation files `mlvu.json` and `mlvu_test.json`:
64 | ```
65 | retake_root/
66 | ├── dataset/
67 |     ├── mlvu/
68 |         ├── mlvu.json
69 |         ├── mlvu_test.json
70 | ├── ...
71 | ```


--------------------------------------------------------------------------------
/ucm/sparse/kvcomp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if(BUILD_NUMA)
 2 |     message(STATUS "Building numactl library...")
 3 | 
 4 |     set(NUMA_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/numa_install)
 5 |     FetchContent_Declare(
 6 |         numactl
 7 |         URL https://github.com/numactl/numactl/releases/download/v2.0.16/numactl-2.0.16.tar.gz
 8 |         TLS_VERIFY OFF
 9 |     )
10 |     FetchContent_MakeAvailable(numactl)
11 |     if(NOT EXISTS "${NUMA_INSTALL_DIR}/lib/libnuma.so")
12 |         message(STATUS "Configuring numactl...")
13 |         execute_process(
14 |             COMMAND ./configure --prefix=${NUMA_INSTALL_DIR}
15 |             WORKING_DIRECTORY ${numactl_SOURCE_DIR}
16 |             RESULT_VARIABLE numa_configure_result
17 |             OUTPUT_VARIABLE numa_configure_output
18 |             ERROR_VARIABLE numa_configure_error
19 |         )
20 |         if(NOT numa_configure_result EQUAL 0)
21 |             message(FATAL_ERROR "Failed to configure numactl. \n"
22 |                                 "Result: ${numa_configure_result}\n"
23 |                                 "STDOUT: ${numa_configure_output}\n"
24 |                                 "STDERR: ${numa_configure_error}\n")
25 |         endif()
26 | 
27 |         message(STATUS "Building and installing numactl...")
28 |         execute_process(
29 |             COMMAND make install -j8
30 |             WORKING_DIRECTORY ${numactl_SOURCE_DIR}
31 |             RESULT_VARIABLE numa_install_result
32 |             OUTPUT_VARIABLE numa_install_output
33 |             ERROR_VARIABLE numa_install_error
34 |         )
35 |         if(NOT numa_install_result EQUAL 0)
36 |             message(FATAL_ERROR "Failed to build and install numactl. \n"
37 |                                 "Result: ${numa_install_result}\n"
38 |                                 "STDOUT: ${numa_install_output}\n"
39 |                                 "STDERR: ${numa_install_error}\n")
40 |         endif()
41 |     else()
42 |         message(STATUS "Found already built libnuma. Skipping build.")
43 |     endif()
44 | 
45 |     add_definitions(-DNUMA_ENABLED)
46 | else()
47 |     message(STATUS "Skipping numactl build...")
48 | endif()
49 | 
50 | add_subdirectory(hash_retrieval)
51 | 


--------------------------------------------------------------------------------
/ucm/logger.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # MIT License
 3 | #
 4 | # Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | # SOFTWARE.
23 | #
24 | 
25 | import logging
26 | import os
27 | 
28 | 
29 | def init_logger(name: str = "UNIFIED_CACHE") -> logging.Logger:
30 |     log_level = os.getenv("UNIFIED_CACHE_LOG_LEVEL", "INFO").upper()
31 | 
32 |     logger = logging.getLogger(name)
33 |     logger.setLevel(log_level)
34 | 
35 |     if not logger.handlers:
36 |         handler = logging.StreamHandler()
37 |         formatter = logging.Formatter(
38 |             "[%(asctime)s] - %(name)s - %(levelname)s [%(filename)s:%(lineno)d] %(message)s",
39 |             datefmt="%Y-%m-%d %H:%M:%S",
40 |         )
41 | 
42 |         handler.setFormatter(formatter)
43 |         logger.addHandler(handler)
44 | 
45 |     return logger
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     os.environ["UNIFIED_CACHE_LOG_LEVEL"] = "DEBUG"
50 |     logger = init_logger()
51 |     logger.debug("debug message")
52 |     logger.info("info message")
53 |     logger.warning("warning message")
54 |     logger.error("error message")
55 | 


--------------------------------------------------------------------------------
/ucm/store/ucmstore.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_STORE_H
25 | #define UNIFIEDCACHE_STORE_H
26 | 
27 | #include "task/task_shard.h"
28 | 
29 | namespace UC {
30 | 
31 | template <class T = Task>
32 | class CCStore {
33 |     using BlockId = std::string;
34 |     using TaskId = size_t;
35 | 
36 | public:
37 |     virtual ~CCStore() = default;
38 |     virtual int32_t Alloc(const BlockId& block) = 0;
39 |     virtual bool Lookup(const BlockId& block) = 0;
40 |     virtual void Commit(const BlockId& block, const bool success) = 0;
41 |     virtual std::list<int32_t> Alloc(const std::list<BlockId>& blocks) = 0;
42 |     virtual std::list<bool> Lookup(const std::list<BlockId>& blocks) = 0;
43 |     virtual void Commit(const std::list<BlockId>& blocks, const bool success) = 0;
44 |     virtual TaskId Submit(T&& task) = 0;
45 |     virtual int32_t Wait(const TaskId task) = 0;
46 |     virtual int32_t Check(const TaskId task, bool& finish) = 0;
47 | };
48 | 
49 | } // namespace UC
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/ucm/shared/metrics/cpy/metrics.py.cc:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #include <pybind11/pybind11.h>
25 | #include <pybind11/stl.h>
26 | #include "stats_monitor.h"
27 | 
28 | namespace py = pybind11;
29 | namespace UC::Metrics {
30 | 
31 | void bind_monitor(py::module_& m)
32 | {
33 |     py::class_<StatsMonitor>(m, "StatsMonitor")
34 |         .def_static("get_instance", &StatsMonitor::GetInstance, py::return_value_policy::reference)
35 |         .def("update_stats", &StatsMonitor::UpdateStats)
36 |         .def("reset_all", &StatsMonitor::ResetAllStats)
37 |         .def("get_stats", &StatsMonitor::GetStats)
38 |         .def("get_stats_and_clear", &StatsMonitor::GetStatsAndClear);
39 | }
40 | 
41 | } // namespace UC::Metrics
42 | 
43 | PYBIND11_MODULE(ucmmonitor, module)
44 | {
45 |     module.attr("project") = UCM_PROJECT_NAME;
46 |     module.attr("version") = UCM_PROJECT_VERSION;
47 |     module.attr("commit_id") = UCM_COMMIT_ID;
48 |     module.attr("build_type") = UCM_BUILD_TYPE;
49 |     UC::Metrics::bind_monitor(module);
50 | }


--------------------------------------------------------------------------------
/ucm/shared/metrics/cc/stats_registry.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_REGISTRY_H
25 | #define UNIFIEDCACHE_REGISTRY_H
26 | 
27 | #include <functional>
28 | #include <mutex>
29 | #include <unordered_map>
30 | #include "stats/istats.h"
31 | 
32 | namespace UC::Metrics {
33 | 
34 | using Creator = std::unique_ptr<IStats> (*)();
35 | 
36 | class StatsRegistry {
37 | public:
38 |     static StatsRegistry& GetInstance();
39 | 
40 |     static void RegisterStats(std::string name, Creator creator);
41 | 
42 |     std::unique_ptr<IStats> CreateStats(const std::string& name);
43 | 
44 |     std::vector<std::string> GetRegisteredStatsNames();
45 | 
46 | private:
47 |     StatsRegistry() = default;
48 |     ~StatsRegistry() = default;
49 |     StatsRegistry(const StatsRegistry&) = delete;
50 |     StatsRegistry& operator=(const StatsRegistry&) = delete;
51 | 
52 |     std::mutex mutex_;
53 |     std::unordered_map<std::string, Creator> registry_;
54 | };
55 | 
56 | } // namespace UC::Metrics
57 | 
58 | #endif // UNIFIEDCACHE_REGISTRY_H


--------------------------------------------------------------------------------
/ucm/shared/trans/simu/simu_device.cc:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #include <fmt/format.h>
25 | #include "simu_buffer.h"
26 | #include "simu_stream.h"
27 | #include "trans/device.h"
28 | 
29 | namespace UC::Trans {
30 | 
31 | Status Device::Setup(int32_t deviceId)
32 | {
33 |     if (deviceId < 0) { return Status::Error(fmt::format("invalid device id({})", deviceId)); }
34 |     return Status::OK();
35 | }
36 | 
37 | std::unique_ptr<Stream> Device::MakeStream()
38 | {
39 |     std::unique_ptr<Stream> stream = nullptr;
40 |     try {
41 |         stream = std::make_unique<SimuStream>();
42 |     } catch (...) {
43 |         return nullptr;
44 |     }
45 |     if (stream->Setup().Success()) { return stream; }
46 |     return nullptr;
47 | }
48 | 
49 | std::unique_ptr<Stream> Device::MakeSMStream() { return MakeStream(); }
50 | 
51 | std::unique_ptr<Buffer> Device::MakeBuffer()
52 | {
53 |     try {
54 |         return std::make_unique<SimuBuffer>();
55 |     } catch (...) {
56 |         return nullptr;
57 |     }
58 | }
59 | 
60 | } // namespace UC::Trans
61 | 


--------------------------------------------------------------------------------
/ucm/sparse/factory.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from typing import Callable
 3 | 
 4 | from vllm.config import VllmConfig
 5 | 
 6 | from ucm.logger import init_logger
 7 | from ucm.sparse.base import UcmSparseBase, UcmSparseRole
 8 | from ucm.utils import Config
 9 | 
10 | logger = init_logger(__name__)
11 | 
12 | 
13 | class UcmSparseFactory:
14 |     _registry: dict[str, Callable[[], type[UcmSparseBase]]] = {}
15 | 
16 |     @classmethod
17 |     def register_sparse_method(
18 |         cls, name: str, module_path: str, class_name: str
19 |     ) -> None:
20 |         """Register a sparse attention method with a lazy-loading module and class name."""
21 |         if name in cls._registry:
22 |             raise ValueError(f"Sparse attention method '{name}' is already registered.")
23 | 
24 |         def loader() -> type[UcmSparseBase]:
25 |             module = importlib.import_module(module_path)
26 |             return getattr(module, class_name)
27 | 
28 |         cls._registry[name] = loader
29 | 
30 |     @classmethod
31 |     def create_sparse_method(
32 |         cls, config: "VllmConfig", role: UcmSparseRole
33 |     ) -> UcmSparseBase:
34 |         ucm_config = Config(config.kv_transfer_config)
35 |         ucm_cfg = ucm_config.get_config().get("ucm_sparse_config")
36 | 
37 |         sparse_method_name, _ = next(iter(ucm_cfg.items()))
38 |         if sparse_method_name in cls._registry:
39 |             sparse_method_cls = cls._registry[sparse_method_name]()
40 |         else:
41 |             raise ValueError(f"Unsupported sparse method type: {sparse_method_name}")
42 |         assert issubclass(sparse_method_cls, UcmSparseBase)
43 |         logger.info("Creating sparse method with name: %s", sparse_method_name)
44 |         return sparse_method_cls(config, role)
45 | 
46 | 
47 | # Register available sparse methods
48 | UcmSparseFactory.register_sparse_method("ESA", "ucm.sparse.esa.esa", "ESA")
49 | UcmSparseFactory.register_sparse_method("KvComp", "ucm.sparse.kvcomp.kvcomp", "KvComp")
50 | UcmSparseFactory.register_sparse_method("GSA", "ucm.sparse.gsa.gsa", "GSA")
51 | UcmSparseFactory.register_sparse_method(
52 |     "KVStarMultiStep", "ucm.sparse.kvstar.multistep", "KVStarMultiStep"
53 | )
54 | UcmSparseFactory.register_sparse_method("Blend", "ucm.sparse.blend.blend", "Blend")
55 | 


--------------------------------------------------------------------------------
/ucm/store/nfsstore/cc/domain/space/space_layout.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_SPACE_LAYOUT_H
25 | #define UNIFIEDCACHE_SPACE_LAYOUT_H
26 | 
27 | #include <memory>
28 | #include <string>
29 | #include <vector>
30 | #include "status/status.h"
31 | 
32 | namespace UC {
33 | 
34 | class SpaceLayout {
35 | public:
36 |     struct DataIterator;
37 | public:
38 |     virtual ~SpaceLayout() = default;
39 |     virtual Status Setup(const std::vector<std::string>& storageBackends) = 0;
40 |     virtual std::string DataFileParent(const std::string& blockId, bool activated) const = 0;
41 |     virtual std::string DataFilePath(const std::string& blockId, bool activated) const = 0;
42 |     virtual std::string ClusterPropertyFilePath() const = 0;
43 |     virtual std::shared_ptr<DataIterator> CreateFilePathIterator() const = 0;
44 |     virtual std::string NextDataFilePath(std::shared_ptr<DataIterator> iter) const = 0;
45 |     virtual bool IsActivatedFile(const std::string& filePath) const = 0;
46 | };
47 | 
48 | } // namespace UC
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/.github/workflows/ucmstore.yml:
--------------------------------------------------------------------------------
 1 | # This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage.
 2 | # See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml
 3 | name: ucmstore
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ "*" ]
 8 |   pull_request:
 9 |     branches: [ "dev*", "main", "*release", "feature*" ]
10 | 
11 | env:
12 |   # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
13 |   BUILD_TYPE: Debug
14 | 
15 | jobs:
16 |   cc_gtest:
17 |     # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac.
18 |     # You can convert this to a matrix build if you need cross-platform coverage.
19 |     # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
20 |     runs-on: ubuntu-latest
21 | 
22 |     steps:
23 |     - uses: actions/checkout@v4
24 | 
25 |     - name: Install googletest
26 |       run: |
27 |         git clone https://github.com/google/googletest.git --depth=1 --branch=v1.17.0
28 |         cd googletest
29 |         mkdir build && cd build
30 |         cmake -DCMAKE_CXX_FLAGS="-fPIC" -DCMAKE_C_FLAGS="-fPIC" -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_STANDARD_REQUIRED=True ..
31 |         sudo make install -j
32 | 
33 |     - name: Configure CMake
34 |       # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
35 |       # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
36 |       run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBUILD_UCM_SPARSE=OFF -DBUILD_UNIT_TESTS=ON -DRUNTIME_ENVIRONMENT=simu
37 | 
38 |     - name: Build
39 |       # Build your program with the given configuration
40 |       run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} -j
41 | 
42 |     - name: Test
43 |       working-directory: ${{github.workspace}}/build
44 |       # Execute tests defined by the CMake configuration.
45 |       # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
46 |       run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure
47 | 


--------------------------------------------------------------------------------
/ucm/shared/metrics/cc/stats_registry.cc:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #include "stats_registry.h"
25 | 
26 | namespace UC::Metrics {
27 | 
28 | StatsRegistry& StatsRegistry::GetInstance()
29 | {
30 |     static StatsRegistry inst;
31 |     return inst;
32 | }
33 | 
34 | void StatsRegistry::RegisterStats(std::string name, Creator creator)
35 | {
36 |     auto& reg = GetInstance();
37 |     std::lock_guard lk(reg.mutex_);
38 |     reg.registry_[name] = creator;
39 | }
40 | 
41 | std::unique_ptr<IStats> StatsRegistry::CreateStats(const std::string& name)
42 | {
43 |     auto& reg = GetInstance();
44 |     std::lock_guard lk(reg.mutex_);
45 |     if (auto it = reg.registry_.find(name); it != reg.registry_.end()) return it->second();
46 |     return nullptr;
47 | }
48 | 
49 | std::vector<std::string> StatsRegistry::GetRegisteredStatsNames()
50 | {
51 |     auto& reg = GetInstance();
52 |     std::lock_guard lk(reg.mutex_);
53 |     std::vector<std::string> names;
54 |     names.reserve(reg.registry_.size());
55 |     for (auto& [n, _] : reg.registry_) names.push_back(n);
56 |     return names;
57 | }
58 | 
59 | } // namespace UC::Metrics


--------------------------------------------------------------------------------
/ucm/store/nfsstore/cc/domain/trans/trans_manager.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_TRANS_MANAGER_H
25 | #define UNIFIEDCACHE_TRANS_MANAGER_H
26 | 
27 | #include "posix_queue.h"
28 | #include "task/task_manager.h"
29 | 
30 | namespace UC {
31 | 
32 | class TransManager : public TaskManager {
33 | public:
34 |     Status Setup(const int32_t deviceId, const size_t streamNumber, const size_t ioSize,
35 |                  const size_t bufferNumber, const SpaceLayout* layout, const size_t timeoutMs,
36 |                  bool useDirect = false)
37 |     {
38 |         this->timeoutMs_ = timeoutMs;
39 |         auto status = Status::OK();
40 |         for (size_t i = 0; i < streamNumber; i++) {
41 |             auto q = std::make_shared<PosixQueue>();
42 |             status = q->Setup(deviceId, ioSize, bufferNumber, &this->failureSet_, layout, timeoutMs,
43 |                               useDirect);
44 |             if (status.Failure()) { break; }
45 |             this->queues_.emplace_back(std::move(q));
46 |         }
47 |         return status;
48 |     }
49 | };
50 | 
51 | }  // namespace UC
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/ucm/store/nfsstore/cc/domain/hotness/hotness_timer.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | 
25 | #ifndef UNIFIEDCACHE_HOTNESS_TIMER_H
26 | #define UNIFIEDCACHE_HOTNESS_TIMER_H
27 | #include <chrono>
28 | #include <functional>
29 | #include "logger/logger.h"
30 | #include "template/timer.h"
31 | 
32 | namespace UC {
33 | 
34 | class HotnessTimer {
35 | public:
36 |     void SetInterval(const size_t interval) { this->interval_ = std::chrono::seconds(interval); }
37 |     Status Start(std::function<void()> callable)
38 |     {
39 |         try {
40 |             this->timer_ = std::make_unique<Timer<std::function<void()>>>(this->interval_,
41 |                                                                           std::move(callable));
42 |         } catch (const std::exception& e) {
43 |             UC_ERROR("Failed({}) to start hotness timer.", e.what());
44 |             return Status::OutOfMemory();
45 |         }
46 |         return this->timer_->Start() ? Status::OK() : Status::Error();
47 |     }
48 | 
49 | private:
50 |     std::chrono::seconds interval_;
51 |     std::unique_ptr<Timer<std::function<void()>>> timer_;
52 | };
53 | 
54 | } // namespace UC
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/ucm/store/pcstore/cc/domain/space/space_layout.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_SPACE_LAYOUT_H
25 | #define UNIFIEDCACHE_SPACE_LAYOUT_H
26 | 
27 | #include <string>
28 | #include <vector>
29 | #include "status/status.h"
30 | 
31 | namespace UC {
32 | 
33 | class SpaceLayout {
34 | public:
35 |     Status Setup(const std::vector<std::string>& storageBackends);
36 |     std::string DataFilePath(const std::string& blockId, bool activated) const;
37 |     Status Commit(const std::string& blockId, bool success) const;
38 | 
39 | private:
40 |     std::vector<std::string> RelativeRoots() const;
41 |     Status AddStorageBackend(const std::string& path);
42 |     Status AddFirstStorageBackend(const std::string& path);
43 |     Status AddSecondaryStorageBackend(const std::string& path);
44 |     std::string StorageBackend(const std::string& blockId) const;
45 |     std::string DataFileRoot() const;
46 |     std::string TempFileRoot() const;
47 |     void ShardBlockId(const std::string& blockId, uint64_t& front, uint64_t& back) const;
48 | 
49 | private:
50 |     std::vector<std::string> storageBackends_;
51 | };
52 | 
53 | } // namespace UC
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/ucm/shared/trans/ascend/ascend_device.cc:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #include <acl/acl.h>
25 | #include "ascend_buffer.h"
26 | #include "ascend_stream.h"
27 | #include "trans/device.h"
28 | 
29 | namespace UC::Trans {
30 | 
31 | Status Device::Setup(int32_t deviceId)
32 | {
33 |     if (deviceId < 0) { return Status::Error(fmt::format("invalid device id({})", deviceId)); }
34 |     auto ret = aclrtSetDevice(deviceId);
35 |     if (ret == ACL_SUCCESS) { return Status::OK(); }
36 |     return Status{ret, std::to_string(ret)};
37 | }
38 | 
39 | std::unique_ptr<Stream> Device::MakeStream()
40 | {
41 |     std::unique_ptr<Stream> stream = nullptr;
42 |     try {
43 |         stream = std::make_unique<AscendStream>();
44 |     } catch (...) {
45 |         return nullptr;
46 |     }
47 |     if (stream->Setup().Success()) { return stream; }
48 |     return nullptr;
49 | }
50 | 
51 | std::unique_ptr<Stream> Device::MakeSMStream() { return nullptr; }
52 | 
53 | std::unique_ptr<Buffer> Device::MakeBuffer()
54 | {
55 |     try {
56 |         return std::make_unique<AscendBuffer>();
57 |     } catch (...) {
58 |         return nullptr;
59 |     }
60 | }
61 | 
62 | } // namespace UC::Trans
63 | 


--------------------------------------------------------------------------------
/ucm/store/nfsstore/cc/domain/space/space_recycle.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * MIT License
 3 |  *
 4 |  * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 7 |  * of this software and associated documentation files (the "Software"), to deal
 8 |  * in the Software without restriction, including without limitation the rights
 9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the Software is
11 |  * furnished to do so, subject to the following conditions:
12 |  *
13 |  * The above copyright notice and this permission notice shall be included in all
14 |  * copies or substantial portions of the Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  * */
24 | #ifndef UNIFIEDCACHE_SPACE_RECYCLE_H
25 | #define UNIFIEDCACHE_SPACE_RECYCLE_H
26 | 
27 | #include <thread>
28 | #include <mutex>
29 | #include <condition_variable>
30 | #include <atomic>
31 | #include <functional>
32 | #include "space_layout.h"
33 | 
34 | namespace UC {
35 | 
36 | class SpaceRecycle {
37 | public:
38 |     using RecycleOneBlockDone = std::function<void(void)>;
39 |     SpaceRecycle() = default;
40 |     SpaceRecycle(const SpaceRecycle&) = delete;
41 |     SpaceRecycle& operator=(const SpaceRecycle&) = delete;
42 |     ~SpaceRecycle();
43 |     Status Setup(const SpaceLayout* layout, const size_t totalNumber,
44 |                  RecycleOneBlockDone done);
45 |     void Trigger();
46 | private:
47 |     void Recycler();
48 | private:
49 |     bool stop_{false};
50 |     bool recycling_{false};
51 |     std::atomic_bool serviceRunning_{false};
52 |     uint32_t recycleNum_{0};
53 |     RecycleOneBlockDone recycleOneBlockDone_;
54 |     const SpaceLayout* layout_{nullptr};
55 |     std::mutex mtx_;
56 |     std::condition_variable cv_;
57 |     std::thread worker_;
58 | };
59 | 
60 | } // namespace UC
61 | #endif


--------------------------------------------------------------------------------