├── .clang-format ├── .clang-tidy ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── master_config ├── proxy_config ├── readme.md ├── src ├── CMakeLists.txt ├── cluster │ ├── CMakeLists.txt │ ├── in_memory_log_store.cpp │ ├── log_state_machine.cpp │ ├── raft_logger.cpp │ └── raft_stuff.cpp ├── common │ ├── CMakeLists.txt │ ├── master_cfg.cpp │ ├── proxy_cfg.cpp │ ├── vector_cfg.cpp │ └── vector_init.cpp ├── database │ ├── CMakeLists.txt │ ├── persistence.cpp │ ├── scalar_storage.cpp │ └── vector_database.cpp ├── httpserver │ ├── CMakeLists.txt │ ├── admin_service_impl.cpp │ ├── base_service_impl.cpp │ ├── http_server.cpp │ ├── master_server.cpp │ ├── master_service_impl.cpp │ ├── proxy_server.cpp │ ├── proxy_service_impl.cpp │ └── user_service_impl.cpp ├── include │ ├── cluster │ │ ├── in_memory_log_store.h │ │ ├── in_memory_state_mgr.h │ │ ├── log_state_machine.h │ │ ├── raft_logger.h │ │ ├── raft_logger_wrapper.h │ │ └── raft_stuff.h │ ├── common │ │ ├── backtrace.h │ │ ├── constants.h │ │ ├── master_cfg.h │ │ ├── proxy_cfg.h │ │ ├── vector_cfg.h │ │ ├── vector_init.h │ │ └── vector_utils.h │ ├── database │ │ ├── persistence.h │ │ ├── scalar_storage.h │ │ └── vector_database.h │ ├── httpserver │ │ ├── admin_service_impl.h │ │ ├── base_service_impl.h │ │ ├── http_server.h │ │ ├── master_server.h │ │ ├── master_service_impl.h │ │ ├── proxy_server.h │ │ ├── proxy_service_impl.h │ │ └── user_service_impl.h │ ├── index │ │ ├── faiss_index.h │ │ ├── filter_index.h │ │ ├── hnswlib_index.h │ │ └── index_factory.h │ └── logger │ │ └── logger.h ├── index │ ├── CMakeLists.txt │ ├── faiss_index.cpp │ ├── filter_index.cpp │ ├── hnswlib_index.cpp │ └── index_factory.cpp └── logger │ ├── CMakeLists.txt │ └── logger.cpp ├── test ├── CMakeLists.txt ├── cfg │ └── cfg_test.cpp ├── database │ ├── database_test.cpp │ └── scalar_storage_test.cpp ├── etcd │ └── etcd_test.cpp ├── index │ ├── faiss_index_test.cpp │ ├── filter_index_test.cpp │ └── hnsw_index_test.cpp ├── test.sh └── test │ └── gtest_test.cpp ├── third_party ├── CMakeLists.txt ├── build.sh ├── patches │ ├── brpc-1.11.0.patch │ └── faiss-1.9.0.patch └── proto │ ├── CMakeLists.txt │ └── http.proto ├── tools ├── CMakeLists.txt ├── backtrace.cpp └── server │ ├── CMakeLists.txt │ ├── vdb_server.cpp │ ├── vdb_server_master.cpp │ └── vdb_server_proxy.cpp └── vectordb_config /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | DerivePointerAlignment: false 3 | PointerAlignment: Right 4 | ColumnLimit: 120 5 | 6 | # Default for clang-8, changed in later clangs. Set explicitly for forwards 7 | # compatibility for students with modern clangs 8 | IncludeBlocks: Preserve 9 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | # 18 | # Modified from the Apache Arrow project for the Terrier project. 19 | # 20 | --- 21 | Checks: ' 22 | bugprone-*, 23 | clang-analyzer-*, 24 | google-*, 25 | modernize-*, 26 | performance-*, 27 | portability-*, 28 | readability-*, 29 | -bugprone-easily-swappable-parameters, 30 | -bugprone-implicit-widening-of-multiplication-result, 31 | -bugprone-narrowing-conversions, 32 | -bugprone-reserved-identifier, 33 | -bugprone-signed-char-misuse, 34 | -bugprone-suspicious-include, 35 | -bugprone-unhandled-self-assignment, 36 | -clang-analyzer-cplusplus.NewDelete, 37 | -clang-analyzer-cplusplus.NewDeleteLeaks, 38 | -clang-analyzer-security.insecureAPI.rand, 39 | -clang-diagnostic-implicit-int-float-conversion, 40 | -google-readability-avoid-underscore-in-googletest-name, 41 | -modernize-avoid-c-arrays, 42 | -modernize-use-nodiscard, 43 | -readability-convert-member-functions-to-static, 44 | -readability-identifier-length, 45 | -readability-function-cognitive-complexity, 46 | -readability-magic-numbers, 47 | -readability-make-member-function-const, 48 | -readability-qualified-auto, 49 | -readability-redundant-access-specifiers, 50 | -bugprone-exception-escape, 51 | -Wambiguous-reversed-operator, 52 | -google-default-arguments, 53 | -modernize-avoid-bind, 54 | ' 55 | CheckOptions: 56 | - { key: readability-identifier-naming.ClassCase, value: CamelCase } 57 | - { key: readability-identifier-naming.EnumCase, value: CamelCase } 58 | - { key: readability-identifier-naming.FunctionCase, value: CamelCase } 59 | - { key: readability-identifier-naming.GlobalConstantCase, value: UPPER_CASE } 60 | - { key: readability-identifier-naming.MemberCase, value: lower_case } 61 | - { key: readability-identifier-naming.MemberSuffix, value: _ } 62 | - { key: readability-identifier-naming.NamespaceCase, value: lower_case } 63 | - { key: readability-identifier-naming.StructCase, value: CamelCase } 64 | - { key: readability-identifier-naming.UnionCase, value: CamelCase } 65 | - { key: readability-identifier-naming.VariableCase, value: lower_case } 66 | WarningsAsErrors: '*' 67 | HeaderFilterRegex: '/(src|test)/include' 68 | AnalyzeTemporaryDtors: true 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #==============================================================================# 2 | # This file specifies intentionally untracked files that git should ignore. 3 | #==============================================================================# 4 | third_party/installed 5 | third_party/src 6 | third_party/proto/*.h 7 | third_party/proto/*.cc 8 | third_party/NuRaft* 9 | third_party/etcd* 10 | #==============================================================================# 11 | # File extensions to be ignored anywhere in the tree. 12 | #==============================================================================# 13 | # Temp files created by most text editors. 14 | *~ 15 | # Merge files created by git. 16 | *.orig 17 | # Java bytecode 18 | *.class 19 | # Byte compiled python modules. 20 | *.pyc 21 | # vim swap files 22 | .*.sw? 23 | .sw? 24 | # vscode settings directory 25 | .vscode 26 | #OS X specific files. 27 | .DS_store 28 | # Core files 29 | #core 30 | 31 | #==============================================================================# 32 | # Explicit files to ignore (only matches one). 33 | #==============================================================================# 34 | # Various tag programs 35 | /tags 36 | /TAGS 37 | /GPATH 38 | /GRTAGS 39 | /GSYMS 40 | /GTAGS 41 | .gitusers 42 | autom4te.cache 43 | cscope.files 44 | cscope.out 45 | autoconf/aclocal.m4 46 | autoconf/autom4te.cache 47 | /compile_commands.json 48 | 49 | #==============================================================================# 50 | # Build artifacts 51 | #==============================================================================# 52 | #m4/ 53 | build/ 54 | #*.m4 55 | *.o 56 | *.lo 57 | *.la 58 | *~ 59 | *.pdf 60 | *.swp 61 | a.out 62 | 63 | #==============================================================================# 64 | # Kate Swap Files 65 | #==============================================================================# 66 | *.kate-swp 67 | .#kate-* 68 | 69 | #==============================================================================# 70 | # Backup artifacts 71 | #==============================================================================# 72 | ~* 73 | *~ 74 | tmp/ 75 | 76 | #==============================================================================# 77 | # KDevelop files 78 | #==============================================================================# 79 | .kdev4 80 | *.kdev4 81 | .dirstamp 82 | .deps 83 | .libs 84 | 85 | #==============================================================================# 86 | # Eclipse files 87 | #==============================================================================# 88 | .wtpmodules 89 | .classpath 90 | .project 91 | .cproject 92 | .pydevproject 93 | .settings 94 | .autotools 95 | 96 | /Debug/ 97 | /misc/ 98 | 99 | #==============================================================================# 100 | # Intellij files 101 | #==============================================================================# 102 | .idea 103 | *.iml 104 | 105 | #==============================================================================# 106 | # Code Coverage files 107 | #==============================================================================# 108 | *.gcno 109 | *.gcda 110 | 111 | 112 | #==============================================================================# 113 | # Eclipse 114 | #==============================================================================# 115 | 116 | .metadata 117 | bin/ 118 | tmp/ 119 | *.tmp 120 | *.bak 121 | *.swp 122 | *~.nib 123 | local.properties 124 | .settings/ 125 | .loadpath 126 | .recommenders 127 | 128 | # Eclipse Core 129 | .project 130 | 131 | # External tool builders 132 | .externalToolBuilders/ 133 | 134 | # Locally stored "Eclipse launch configurations" 135 | *.launch 136 | 137 | # PyDev specific (Python IDE for Eclipse) 138 | *.pydevproject 139 | 140 | # CDT-specific (C/C++ Development Tooling) 141 | .cproject 142 | 143 | # JDT-specific (Eclipse Java Development Tools) 144 | .classpath 145 | 146 | # Java annotation processor (APT) 147 | .factorypath 148 | 149 | # PDT-specific (PHP Development Tools) 150 | .buildpath 151 | 152 | # sbteclipse plugin 153 | .target 154 | 155 | # Tern plugin 156 | .tern-project 157 | 158 | # TeXlipse plugin 159 | .texlipse 160 | 161 | # STS (Spring Tool Suite) 162 | .springBeans 163 | 164 | # Code Recommenders 165 | .recommenders/ 166 | io_file 167 | 168 | ## General 169 | 170 | # Compiled Object files 171 | *.slo 172 | *.lo 173 | *.o 174 | *.cuo 175 | 176 | # Compiled Dynamic libraries 177 | *.so 178 | *.dylib 179 | 180 | # Compiled Static libraries 181 | *.lai 182 | *.la 183 | *.a 184 | 185 | # Compiled python 186 | *.pyc 187 | 188 | # Compiled MATLAB 189 | *.mex* 190 | 191 | # IPython notebook checkpoints 192 | .ipynb_checkpoints 193 | 194 | # Editor temporaries 195 | *.swp 196 | *~ 197 | 198 | # Sublime Text settings 199 | *.sublime-workspace 200 | *.sublime-project 201 | 202 | # Eclipse Project settings 203 | *.*project 204 | .settings 205 | 206 | # Visual Studio 207 | .vs 208 | 209 | # QtCreator files 210 | *.user 211 | 212 | # PyCharm files 213 | .idea 214 | 215 | # OSX dir files 216 | .DS_Store 217 | 218 | # User's build configuration 219 | Makefile.config 220 | 221 | # build, distribute, and bins (+ python proto bindings) 222 | build 223 | .build_debug/* 224 | .build_release/* 225 | distribute/* 226 | *.testbin 227 | *.bin 228 | cmake_build 229 | .cmake_build 230 | cmake-build-* 231 | *CMakeFiles/ 232 | *.cmake 233 | Makefile 234 | # Generated documentation 235 | apidoc/doc 236 | docs/_site 237 | docs/gathered 238 | _site 239 | doxygen 240 | docs/dev 241 | 242 | # Config files 243 | *.conf 244 | 245 | # Vagrant 246 | .vagrant 247 | 248 | # Clangd cache index 249 | .cache 250 | 251 | # Submission zip files 252 | *.zip 253 | *.tar* 254 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | project (VectorDB) 4 | 5 | # set(CMAKE_C_FLAGS_DEBUG ON) 6 | # set(CMAKE_CXX_FLAGS_DEBUG ON) 7 | set(CMAKE_BUILD_TYPE Debug) 8 | set(CMAKE_CXX_STANDARD 20) 9 | set(CMAKE_CXX_STANDARD_REQUIRED TRUE) 10 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 11 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -g") 12 | # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g") 13 | message("CMAKE_BINARY_DIR ${CMAKE_BINARY_DIR}") 14 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 15 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 16 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 17 | 18 | 19 | 20 | set(VECTORDB_SRC_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/src/include) 21 | set(VECTORDB_THIRD_PARTY_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/third_party/installed/include) 22 | set(PROTOS_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/third_party/proto) 23 | add_compile_options(-Wno-unused-function) 24 | include_directories(${VECTORDB_SRC_INCLUDE_DIR} ${VECTORDB_THIRD_PARTY_INCLUDE_DIR} ${PROTOS_INCLUDE_DIR}) 25 | string(REPLACE "-Wno-unused-function" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") 26 | include_directories(BEFORE src) # This is needed for gtest. 27 | enable_testing() 28 | 29 | set(THIRD_PARTY_LIB) 30 | set(PROTO_LIB) 31 | add_subdirectory(third_party) 32 | add_subdirectory(src) 33 | add_subdirectory(test) 34 | add_subdirectory(tools) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Zhou zj 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /master_config: -------------------------------------------------------------------------------- 1 | { 2 | "MASTER_HOST" : "0.0.0.0", 3 | "MASTER_PORT" : 6060, 4 | "ETCD_ENDPOINTS" : "http://127.0.0.1:2379", 5 | 6 | "LOG":{ 7 | "LOG_NAME" : "my_log", 8 | "LOG_LEVEL" : 1 9 | } 10 | } -------------------------------------------------------------------------------- /proxy_config: -------------------------------------------------------------------------------- 1 | { 2 | "INSTANCE_ID" : 1, 3 | "PROXY_ADDRESS" : "0.0.0.0", 4 | "PROXY_PORT" : 6061, 5 | 6 | "MASTER_HOST" : "127.0.0.1", 7 | "MASTER_PORT" : 6060, 8 | 9 | "READ_PATHS" :[ "/UserService/search"], 10 | "WRITE_PATHS" :[ "/UserService/upsert"], 11 | 12 | "LOG":{ 13 | "LOG_NAME" : "my_log", 14 | "LOG_LEVEL" : 1 15 | } 16 | } -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # vectorDB 2 | 3 | a vector database implementation based on 《从零构建向量数据库》 4 | 5 | 支持FaissIndex,HnswlibIndex; 6 | 支持标量向量混合查询; 7 | 支持数据持久化存储; 8 | 使用http请求对数据库发起访问,插入或查询vector 9 | 10 | ## How to build 11 | ### 安装通用依赖库 12 | 13 | Ubuntu: 14 | ``` shell 15 | sudo apt-get install cmake openssl libssl-dev libz-dev libcpprest-dev gfortran 16 | ``` 17 | 18 | ### Prepare environment variables 19 | 20 | ```shell 21 | echo "export VECTORDB_CODE_BASE=_______" >> ~/.bashrc #下载后的代码根路径 例如/home/zhouzj/project/vectorDB 22 | source ~/.bashrc 23 | ``` 24 | 25 | ### Build third_party 26 | 27 | Switch to the project directory: 28 | 29 | ```shell 30 | cd third_party 31 | bash build.sh 32 | ``` 33 | you can use 34 | ```shell 35 | bash build.sh --help to see more detail 36 | ``` 37 | 38 | ### Build vectorDB 39 | 40 | Switch to the project directory: 41 | 42 | ```shell 43 | $ mkdir build 44 | $ cd build 45 | $ cmake .. 46 | $ make 47 | ``` 48 | 49 | If you want to compile the system in debug mode, pass in the following flag to cmake: 50 | Debug mode: 51 | 52 | ```shell 53 | $ cmake -DCMAKE_BUILD_TYPE=Debug .. 54 | $ make -j`nproc` 55 | ``` 56 | 57 | ## How to use 58 | 59 | ### Edit vectordb_config 60 | You can find vectordb_config under project directory; 61 | You can modify the content of each item according to your preferences. 62 | See the example and explaination in common/vector_cfg.cpp 63 | 64 | ### Build directory 65 | According to your vectordb_config,you should make sure these path exist; 66 | like this: 67 | ```shell 68 | mkdir ~/vectordb1/ 69 | cd ~/vectordb1 70 | mkdir snap 71 | mkdir storage 72 | 73 | mkdir ~/test_vectordb/ 74 | cd ~/test_vectordb 75 | mkdir snap 76 | mkdir storage 77 | ``` 78 | When you use vdb_server you will use ~/vectordb1,when vdb_server is restarted, the contents will be retained 79 | If you want to reset, you can remove these and create again like this: 80 | ``` shell 81 | cd ~/vectordb1 82 | rm -rf wal snap* storage* 83 | mkdir snap 84 | mkdir storage 85 | ``` 86 | 87 | ### Build protobuf 88 | After change the third_part/proto, you should run 89 | ``` shell 90 | cd third_party 91 | bash build.sh --package protobuf 92 | ``` 93 | to rebuild .pb.h and .pb.cc 94 | 95 | ### Start Server 96 | Switch to the project directory: 97 | 98 | ```shell 99 | cd build 100 | ./bin/vdb_server 101 | ``` 102 | 103 | ### Operation 104 | You can open another terminal,and input commands, following the example commands in `test/test.h`. 105 | Remember to modify the port to keep it consistent with the one in vectordb_config 106 | 107 | ### Use Google Test 108 | You can build and use different google test like this 109 | 110 | Switch to the project directory: 111 | ```shell 112 | $ mkdir build 113 | $ cd build 114 | $ cmake .. 115 | $ make faiss_index_test 116 | $ cd test 117 | $ .faiss_index_test 118 | ``` 119 | 120 | 121 | ### Build Cluster 122 | 123 | Switch to the project directory: 124 | 125 | ```shell 126 | cd build 127 | ./bin/vdb_server 1 128 | ``` 129 | 130 | 在另一终端中 131 | ```shell 132 | cd build 133 | ./bin/vdb_server 2 134 | ``` 135 | 1,2为nodeID 136 | 只要在vectordb_config中配置好node信息,即可在终端中执行```./vdb_server $nodeid```(不输入nodeid则默认为1) 137 | 138 | 139 | 选取其中一个作为主节点,将其他节点作为从节点加入集群: 140 | ```shell 141 | curl -X POST -H "Content-Type: application/json" -d '{"nodeId": 2, "endpoint": "127.0.0.1:8082"}' http://localhost:7781/AdminService/AddFollower 142 | ``` 143 | 想让哪个节点作为主节点,就向该节点port发出请求,-d后为待加入的从节点信息,一次加入一个,加入完毕后通过List来查看是否已加入集群: 144 | ```shell 145 | curl -X GET http://localhost:7781/AdminService/ListNode 146 | ``` 147 | 此外可以通过GetNode来查看当前节点状态 148 | ```shell 149 | curl -X GET http://localhost:7781/AdminService/GetNode 150 | ``` 151 | 152 | 向主节点中发起upsert请求,从节点中进行search,也可查到数据; 153 | ```shell 154 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.999], "id":6, "int_field":47,"indexType": "FLAT"}' http://localhost:7781/UserService/upsert 155 | 156 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.999], "k": 5 , "indexType": "FLAT","filter":{"fieldName":"int_field","value":47,"op":"="}}' http://localhost:7782/UserService/search 157 | ``` 158 | 159 | 目前支持流量转发、故障切换、集群分片 160 | 161 | 建立vdb_server集群后,利用vdb_server_master管理集群元数据信息,需要手动提前部署好etcd 162 | ```shell 163 | cd build 164 | ./bin/vdb_server_master 165 | ``` 166 | 可通过如下接口管理vdb_server集群信息(role 1为从节点,0为主节点) 167 | ```shell 168 | #查看node信息 169 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId" : 1,"nodeId": 1}' http://localhost:6060/MasterService/GetNodeInfo 170 | #查看instance下的所有node信息 171 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId" : 1}' http://localhost:6060/MasterService/GetInstance 172 | #增加node2信息 173 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId": 1, "nodeId": 2, "url": "http://127.0.0.1:7782", "role": 1, "status": 0}' http://localhost:6060/MasterService/AddNode 174 | #删除node2信息 175 | curl -X DELETE -H "Content-Type: application/json" -d '{"instanceId" : 1,"nodeId": 2}' http://localhost:6060/MasterService/RemoveNode 176 | 177 | #更新分区信息: 178 | curl -X POST http://localhost:6060/MasterService/UpdatePartitionConfig -H "Content-Type: application/json" -d '{ 179 | "instanceId": 1, 180 | "partitionKey": "id", 181 | "numberOfPartitions": 2, 182 | "partitions": [ 183 | {"partitionId": 0, "nodeId": 1}, 184 | {"partitionId": 0, "nodeId": 2}, 185 | {"partitionId": 0, "nodeId": 3}, 186 | {"partitionId": 1, "nodeId": 4} 187 | ] 188 | }' 189 | 190 | #获取分区信息: 191 | 192 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId" : 1}' http://localhost:6060/MasterService/GetPartitionConfig 193 | ``` 194 | 195 | 利用vdb_server_proxy提供统一的流量入口(读写分离,写必然在主节点) 196 | ```shell 197 | cd build 198 | ./bin/vdb_server_master 199 | ``` 200 | 201 | 如果master中设置了partition,则请求也会根据分片进行转发(一个分片需要有一个主节点) 202 | ```shell 203 | #读请求 204 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.9], "k": 5, "indexType": "FLAT", "filter":{"fieldName":"int_field","value":49, "op":"="}}' http://localhost:6061/ProxyService/search 205 | 206 | #写请求 207 | curl -X POST -H "Content-Type: application/json" -d '{"id": 6, "vectors": [0.9], "int_field": 49, "indexType": "FLAT"}' http://localhost:6061/ProxyService/upsert 208 | 209 | #强制读主 210 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.89], "k": 5, "indexType": "FLAT", "filter":{"fieldName":"int_field","value":49 ,"op":"="},"forceMaster" : true}' http://localhost:6061/ProxyService/search 211 | ``` 212 | 213 | ## Reference 214 | 215 | ### Book 216 | 217 | * 《从零构建向量数据库》 218 | 219 | ### Third-Party Libraries 220 | 221 | * faiss 222 | * hnswlib 223 | * openblas 224 | * brpc 225 | * rapidjson 226 | * httplib 227 | * spdlog 228 | * gflags 229 | * protobuf 230 | * glog 231 | * crypto 232 | * leveldb 233 | * ssl 234 | * z 235 | * rocksdb 236 | * snappy 237 | * lz4 238 | * bz2 239 | * roaring 240 | * gtest 241 | * backward-cpp 242 | * nuraft 243 | * curl 244 | * etcdclient 245 | 246 | ### Repository 247 | 非常感谢 Xiaoccer , third_party下的patches,build.sh,CMakeLists.txt基于 [TineVecDB](https://github.com/Xiaoccer/TinyVecDB.git) 中third_party下的内容进行修改 248 | 249 | ## License 250 | 251 | vectorDB is licensed under the MIT License. For more details, please refer to the [LICENSE](./LICENSE) file. 252 | 253 | 254 | 255 | ## Optimization 256 | 目前已完成《从零构建向量数据库》前五章所有内容,并在此之上做如下优化: 257 | 258 | ### 代码架构 259 | 将代码结构化,添加CMakeList一键编译; 260 | 添加三方库一键编译; 261 | 262 | ### 添加vectordb_config文件 263 | 将涉及的路径、端口等用config文件进行配置 264 | 读取config文件进行配置解析,采用Cfg单例来读取解析后的配置内容 265 | 266 | ### 单元测试 267 | 添加google test并加入cmake,支持编译运行单元测试 268 | 目前添加了部分单元测试 269 | 270 | ### 堆栈跟踪美化打印 271 | 添加backward-cpp 272 | 只需在CMakeList中添加需要backward的target即可 273 | 例如add_backward(vdb_server) 274 | 275 | ### 添加代码规范检查 276 | 添加 clang-tidy,clang-format 277 | 278 | ### 添加单例抽象类 279 | 添加单例模板类,将IndexFactory改造为真正的单例模式 280 | 281 | ### 压缩WAL日志文件 282 | 采用snappy压缩算法,对WAL进行压缩及解压缩,减少WAL文件所占空间 283 | 284 | ### 优化通讯协议 285 | 利用protobuf + brpc替代httplib 286 | 287 | 288 | 289 | 290 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | add_subdirectory(index) 4 | add_subdirectory(logger) 5 | add_subdirectory(httpserver) 6 | add_subdirectory(database) 7 | add_subdirectory(common) 8 | add_subdirectory(cluster) 9 | add_library(vectorDB STATIC ${ALL_OBJECT_FILES}) 10 | 11 | set(VECTORDB_LIBS 12 | vectorDB_index 13 | vectorDB_logger 14 | vectorDB_httpserver 15 | vectorDB_database 16 | vectorDB_common 17 | vectorDB_cluster 18 | ) 19 | 20 | 21 | set(WL_START_GROUP "-Wl,--start-group") 22 | set(WL_END_GROUP "-Wl,--end-group") 23 | set(LINK_LIBS ${LINK_LIBS} 24 | ${WL_START_GROUP} 25 | ${THIRD_PARTY_LIB} 26 | ${WL_END_GROUP} 27 | ${PROTO_LIB} 28 | ${VECTORDB_LIBS} 29 | -fopenmp 30 | -lpthread 31 | -lstdc++fs 32 | -lzstd 33 | -lcpprest 34 | -lgfortran 35 | -ldl 36 | ) 37 | 38 | message(STATUS "Link_libs: ${LINK_LIBS}") 39 | target_link_libraries(vectorDB ${LINK_LIBS}) 40 | 41 | target_include_directories( 42 | vectorDB PUBLIC $ 43 | $) 44 | -------------------------------------------------------------------------------- /src/cluster/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( 2 | vectorDB_cluster 3 | OBJECT 4 | in_memory_log_store.cpp 5 | log_state_machine.cpp 6 | raft_logger.cpp 7 | raft_stuff.cpp 8 | ) 9 | 10 | set(ALL_OBJECT_FILES 11 | ${ALL_OBJECT_FILES} $ 12 | PARENT_SCOPE) -------------------------------------------------------------------------------- /src/cluster/in_memory_log_store.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************ 2 | Copyright 2017-2019 eBay Inc. 3 | Author/Developer(s): Jung-Sang Ahn 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | https://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | **************************************************************************/ 17 | 18 | #include "cluster/in_memory_log_store.h" 19 | 20 | #include "libnuraft/nuraft.hxx" 21 | #include "logger/logger.h" 22 | 23 | #include 24 | #include 25 | 26 | namespace vectordb { 27 | 28 | InmemLogStore::InmemLogStore(VectorDatabase *vector_database) 29 | : start_idx_(vector_database->GetStartIndexId() + 1), 30 | raft_server_bwd_pointer_(nullptr), 31 | disk_emul_delay_(0), 32 | disk_emul_thread_(nullptr), 33 | disk_emul_thread_stop_signal_(false), 34 | disk_emul_last_durable_index_(0), 35 | vector_database_(vector_database) { 36 | // Dummy entry for index 0. 37 | nuraft::ptr buf = nuraft::buffer::alloc(sz_ulong); 38 | logs_[0] = nuraft::cs_new(0, buf); 39 | } 40 | 41 | InmemLogStore::~InmemLogStore() { 42 | if (disk_emul_thread_) { 43 | disk_emul_thread_stop_signal_ = true; 44 | disk_emul_ea_.invoke(); 45 | if (disk_emul_thread_->joinable()) { 46 | disk_emul_thread_->join(); 47 | } 48 | } 49 | } 50 | 51 | auto InmemLogStore::MakeClone(const nuraft::ptr &entry) -> nuraft::ptr { 52 | // NOTE: 53 | // Timestamp is used only when `replicate_log_timestamp_` option is on. 54 | // Otherwise, log store does not need to store or load it. 55 | nuraft::ptr clone = nuraft::cs_new( 56 | entry->get_term(), nuraft::buffer::clone(entry->get_buf()), entry->get_val_type(), entry->get_timestamp()); 57 | return clone; 58 | } 59 | 60 | auto InmemLogStore::next_slot() const -> nuraft::ulong { 61 | std::lock_guard l(logs_lock_); 62 | // Exclude the dummy entry. 63 | return start_idx_ + logs_.size() - 1; 64 | } 65 | 66 | auto InmemLogStore::start_index() const -> nuraft::ulong { return start_idx_; } 67 | 68 | auto InmemLogStore::last_entry() const -> nuraft::ptr { 69 | ulong next_idx = next_slot(); 70 | std::lock_guard l(logs_lock_); 71 | auto entry = logs_.find(next_idx - 1); 72 | if (entry == logs_.end()) { 73 | entry = logs_.find(0); 74 | } 75 | 76 | return MakeClone(entry->second); 77 | } 78 | 79 | auto InmemLogStore::append(nuraft::ptr &entry) -> nuraft::ulong { 80 | nuraft::ptr clone = MakeClone(entry); 81 | 82 | std::lock_guard l(logs_lock_); 83 | size_t idx = start_idx_ + logs_.size() - 1; 84 | logs_[idx] = clone; 85 | if (entry->get_val_type() == nuraft::log_val_type::app_log) { 86 | nuraft::buffer &data = clone->get_buf(); 87 | std::string content(reinterpret_cast(data.data() + data.pos() + sizeof(int)), 88 | data.size() - sizeof(int)); 89 | 90 | global_logger->debug("Append app logs {}, content: {}, value type {}", idx, content, 91 | "nuraft::log_val_type::app_log"); // 添加打印日志 92 | 93 | vector_database_->WriteWalLogWithId(idx, content); 94 | } else { 95 | nuraft::buffer &data = clone->get_buf(); 96 | std::string content(reinterpret_cast(data.data() + data.pos()), data.size()); 97 | global_logger->debug("Append other logs {}, content: {}, value type {}", idx, content, 98 | static_cast(entry->get_val_type())); // 添加打印日志 99 | } 100 | if (disk_emul_delay_ != 0U) { 101 | uint64_t cur_time = nuraft::timer_helper::get_timeofday_us(); 102 | disk_emul_logs_being_written_[cur_time + disk_emul_delay_ * 1000] = idx; 103 | disk_emul_ea_.invoke(); 104 | } 105 | 106 | return idx; 107 | } 108 | 109 | void InmemLogStore::write_at(ulong index, nuraft::ptr &entry) { 110 | nuraft::ptr clone = MakeClone(entry); 111 | 112 | // Discard all logs equal to or greater than `index. 113 | std::lock_guard l(logs_lock_); 114 | auto itr = logs_.lower_bound(index); 115 | while (itr != logs_.end()) { 116 | itr = logs_.erase(itr); 117 | } 118 | logs_[index] = clone; 119 | 120 | if (disk_emul_delay_ != 0U) { 121 | uint64_t cur_time = nuraft::timer_helper::get_timeofday_us(); 122 | disk_emul_logs_being_written_[cur_time + disk_emul_delay_ * 1000] = index; 123 | 124 | // Remove entries greater than `index`. 125 | auto entry = disk_emul_logs_being_written_.begin(); 126 | while (entry != disk_emul_logs_being_written_.end()) { 127 | if (entry->second > index) { 128 | entry = disk_emul_logs_being_written_.erase(entry); 129 | } else { 130 | entry++; 131 | } 132 | } 133 | disk_emul_ea_.invoke(); 134 | } 135 | } 136 | 137 | auto InmemLogStore::log_entries(nuraft::ulong start, nuraft::ulong end) 138 | -> nuraft::ptr>> { 139 | nuraft::ptr>> ret = 140 | nuraft::cs_new>>(); 141 | 142 | ret->resize(end - start); 143 | nuraft::ulong cc = 0; 144 | for (nuraft::ulong ii = start; ii < end; ++ii) { 145 | nuraft::ptr src = nullptr; 146 | { 147 | std::lock_guard l(logs_lock_); 148 | auto entry = logs_.find(ii); 149 | if (entry == logs_.end()) { 150 | entry = logs_.find(0); 151 | assert(0); 152 | } 153 | src = entry->second; 154 | } 155 | (*ret)[cc++] = MakeClone(src); 156 | } 157 | return ret; 158 | } 159 | 160 | auto InmemLogStore::log_entries_ext(nuraft::ulong start, nuraft::ulong end, nuraft::int64 batch_size_hint_in_bytes) 161 | -> nuraft::ptr>> { 162 | nuraft::ptr>> ret = 163 | nuraft::cs_new>>(); 164 | 165 | if (batch_size_hint_in_bytes < 0) { 166 | return ret; 167 | } 168 | 169 | size_t accum_size = 0; 170 | for (nuraft::ulong ii = start; ii < end; ++ii) { 171 | nuraft::ptr src = nullptr; 172 | { 173 | std::lock_guard l(logs_lock_); 174 | auto entry = logs_.find(ii); 175 | if (entry == logs_.end()) { 176 | entry = logs_.find(0); 177 | assert(0); 178 | } 179 | src = entry->second; 180 | } 181 | ret->push_back(MakeClone(src)); 182 | accum_size += src->get_buf().size(); 183 | if ((batch_size_hint_in_bytes != 0) && accum_size >= static_cast(batch_size_hint_in_bytes)) { 184 | break; 185 | } 186 | } 187 | return ret; 188 | } 189 | 190 | auto InmemLogStore::entry_at(ulong index) -> nuraft::ptr { 191 | nuraft::ptr src = nullptr; 192 | { 193 | std::lock_guard l(logs_lock_); 194 | auto entry = logs_.find(index); 195 | if (entry == logs_.end()) { 196 | entry = logs_.find(0); 197 | } 198 | src = entry->second; 199 | } 200 | return MakeClone(src); 201 | } 202 | 203 | auto InmemLogStore::term_at(ulong index) -> ulong { 204 | ulong term = 0; 205 | { 206 | std::lock_guard l(logs_lock_); 207 | auto entry = logs_.find(index); 208 | if (entry == logs_.end()) { 209 | entry = logs_.find(0); 210 | } 211 | term = entry->second->get_term(); 212 | } 213 | return term; 214 | } 215 | 216 | auto InmemLogStore::pack(nuraft::ulong index, nuraft::int32 cnt) -> nuraft::ptr { 217 | std::vector> logs; 218 | 219 | size_t size_total = 0; 220 | for (ulong ii = index; ii < index + cnt; ++ii) { 221 | nuraft::ptr le = nullptr; 222 | { 223 | std::lock_guard l(logs_lock_); 224 | le = logs_[ii]; 225 | } 226 | assert(le.get()); 227 | nuraft::ptr buf = le->serialize(); 228 | size_total += buf->size(); 229 | logs.push_back(buf); 230 | } 231 | 232 | nuraft::ptr buf_out = 233 | nuraft::buffer::alloc(sizeof(nuraft::int32) + cnt * sizeof(nuraft::int32) + size_total); 234 | buf_out->pos(0); 235 | buf_out->put(cnt); 236 | 237 | for (auto &entry : logs) { 238 | nuraft::ptr &bb = entry; 239 | buf_out->put(static_cast(bb->size())); 240 | buf_out->put(*bb); 241 | } 242 | return buf_out; 243 | } 244 | 245 | void InmemLogStore::apply_pack(nuraft::ulong index, nuraft::buffer &pack) { 246 | pack.pos(0); 247 | nuraft::int32 num_logs = pack.get_int(); 248 | 249 | for (nuraft::int32 ii = 0; ii < num_logs; ++ii) { 250 | ulong cur_idx = index + ii; 251 | nuraft::int32 buf_size = pack.get_int(); 252 | 253 | nuraft::ptr buf_local = nuraft::buffer::alloc(buf_size); 254 | pack.get(buf_local); 255 | 256 | nuraft::ptr le = nuraft::log_entry::deserialize(*buf_local); 257 | { 258 | std::lock_guard l(logs_lock_); 259 | logs_[cur_idx] = le; 260 | } 261 | } 262 | 263 | { 264 | std::lock_guard l(logs_lock_); 265 | auto entry = logs_.upper_bound(0); 266 | if (entry != logs_.end()) { 267 | start_idx_ = entry->first; 268 | } else { 269 | start_idx_ = 1; 270 | } 271 | } 272 | } 273 | 274 | auto InmemLogStore::compact(ulong last_log_index) -> bool { 275 | std::lock_guard l(logs_lock_); 276 | for (ulong ii = start_idx_; ii <= last_log_index; ++ii) { 277 | auto entry = logs_.find(ii); 278 | if (entry != logs_.end()) { 279 | logs_.erase(entry); 280 | } 281 | } 282 | 283 | // WARNING: 284 | // Even though nothing has been erased, 285 | // we should set `start_idx_` to new index. 286 | if (start_idx_ <= last_log_index) { 287 | start_idx_ = last_log_index + 1; 288 | } 289 | return true; 290 | } 291 | 292 | auto InmemLogStore::flush() -> bool { 293 | disk_emul_last_durable_index_ = next_slot() - 1; 294 | return true; 295 | } 296 | 297 | void InmemLogStore::Close() {} 298 | 299 | void InmemLogStore::SetDiskDelay(nuraft::raft_server *raft, size_t delay_ms) { 300 | disk_emul_delay_ = delay_ms; 301 | raft_server_bwd_pointer_ = raft; 302 | 303 | if (!disk_emul_thread_) { 304 | disk_emul_thread_ = std::make_unique(&InmemLogStore::disk_emul_ea_, this); 305 | } 306 | } 307 | 308 | auto InmemLogStore::last_durable_index() -> ulong { 309 | uint64_t last_log = next_slot() - 1; 310 | if (disk_emul_delay_ == 0U) { 311 | return last_log; 312 | } 313 | 314 | return disk_emul_last_durable_index_; 315 | } 316 | 317 | void InmemLogStore::DiskEmulLoop() { 318 | // This thread mimics async disk writes. 319 | 320 | size_t next_sleep_us = 100 * 1000; 321 | while (!disk_emul_thread_stop_signal_) { 322 | disk_emul_ea_.wait_us(next_sleep_us); 323 | disk_emul_ea_.reset(); 324 | if (disk_emul_thread_stop_signal_) { 325 | break; 326 | } 327 | 328 | uint64_t cur_time = nuraft::timer_helper::get_timeofday_us(); 329 | next_sleep_us = 100 * 1000; 330 | 331 | bool call_notification = false; 332 | { 333 | std::lock_guard l(logs_lock_); 334 | // Remove all timestamps equal to or smaller than `cur_time`, 335 | // and pick the greatest one among them. 336 | auto entry = disk_emul_logs_being_written_.begin(); 337 | while (entry != disk_emul_logs_being_written_.end()) { 338 | if (entry->first <= cur_time) { 339 | disk_emul_last_durable_index_ = entry->second; 340 | entry = disk_emul_logs_being_written_.erase(entry); 341 | call_notification = true; 342 | } else { 343 | break; 344 | } 345 | } 346 | 347 | entry = disk_emul_logs_being_written_.begin(); 348 | if (entry != disk_emul_logs_being_written_.end()) { 349 | next_sleep_us = entry->first - cur_time; 350 | } 351 | } 352 | 353 | if (call_notification) { 354 | raft_server_bwd_pointer_->notify_log_append_completion(true); 355 | } 356 | } 357 | } 358 | 359 | } // namespace vectordb 360 | -------------------------------------------------------------------------------- /src/cluster/log_state_machine.cpp: -------------------------------------------------------------------------------- 1 | #include "cluster/log_state_machine.h" 2 | #include 3 | #include "common/constants.h" 4 | #include "logger/logger.h" 5 | 6 | namespace vectordb { 7 | 8 | void LogStateMachine::SetVectorDatabase(VectorDatabase *vector_database) { 9 | vector_database_ = vector_database; // 设置 vector_database_ 指针 10 | last_committed_idx_ = vector_database->GetStartIndexId(); 11 | } 12 | 13 | auto LogStateMachine::commit(const nuraft::ulong log_idx, nuraft::buffer &data) -> nuraft::ptr { 14 | std::string content(reinterpret_cast(data.data() + data.pos() + sizeof(int)), 15 | data.size() - sizeof(int)); 16 | global_logger->debug("Commit log_idx: {}, content: {}", log_idx, content); // 添加打印日志 17 | 18 | rapidjson::Document json_request; 19 | json_request.Parse(content.c_str()); 20 | uint64_t label = json_request[REQUEST_ID].GetUint64(); 21 | 22 | // Update last committed index number. 23 | last_committed_idx_ = log_idx; 24 | 25 | // 获取请求参数中的索引类型 26 | IndexFactory::IndexType index_type = vector_database_->GetIndexTypeFromRequest(json_request); 27 | 28 | // vector_database_->Upsert(label, json_request, index_type); 29 | vector_database_->Upsert(label, json_request, index_type); 30 | // 在 upsert 调用之后调用 VectorDatabase::writeWALLog 31 | // vector_database_->WriteWalLog("upsert", json_request); 32 | 33 | // Return Raft log number as a return result. 34 | nuraft::ptr ret = nuraft::buffer::alloc(sizeof(log_idx)); 35 | nuraft::buffer_serializer bs(ret); 36 | bs.put_u64(log_idx); 37 | return ret; 38 | } 39 | 40 | auto LogStateMachine::pre_commit(const nuraft::ulong log_idx, nuraft::buffer &data) -> nuraft::ptr { 41 | std::string content(reinterpret_cast(data.data() + data.pos() + sizeof(int)), 42 | data.size() - sizeof(int)); 43 | global_logger->debug("Pre Commit log_idx: {}, content: {}", log_idx, content); // 添加打印日志 44 | return nullptr; 45 | } 46 | 47 | } // namespace vectordb -------------------------------------------------------------------------------- /src/cluster/raft_stuff.cpp: -------------------------------------------------------------------------------- 1 | #include "cluster/raft_stuff.h" 2 | #include "cluster/raft_logger_wrapper.h" 3 | #include "logger/logger.h" 4 | 5 | namespace vectordb { 6 | 7 | RaftStuff::RaftStuff(int node_id, std::string &endpoint, int port, VectorDatabase *vector_database) 8 | : node_id_(node_id), 9 | endpoint_(endpoint), 10 | port_(port), 11 | vector_database_(vector_database) { // 初始化 vector_database_ 指针 12 | Init(); 13 | } 14 | 15 | void RaftStuff::Init() { 16 | smgr_ = nuraft::cs_new(node_id_, endpoint_, vector_database_); 17 | sm_ = nuraft::cs_new(); 18 | 19 | // 将 state_machine 对象强制转换为 log_state_machine 对象 20 | nuraft::ptr log_sm = std::dynamic_pointer_cast(sm_); 21 | 22 | log_sm->SetVectorDatabase( 23 | vector_database_); // 将 vector_database_ 参数传递给 log_state_machine 的 setVectorDatabase 函数 24 | 25 | nuraft::asio_service::options asio_opt; 26 | asio_opt.thread_pool_size_ = 1; 27 | 28 | // nuraft::raft_params params; 29 | // params.election_timeout_lower_bound_ = 100000000; // 设置为一个非常大的值 30 | // params.election_timeout_upper_bound_ = 200000000; // 设置为一个非常大的值 31 | 32 | // Raft parameters. 33 | nuraft::raft_params params; 34 | #if defined(WIN32) || defined(_WIN32) 35 | // heartbeat: 1 sec, election timeout: 2 - 4 sec. 36 | params.heart_beat_interval_ = 1000; 37 | params.election_timeout_lower_bound_ = 2000; 38 | params.election_timeout_upper_bound_ = 4000; 39 | #else 40 | // heartbeat: 100 ms, election timeout: 200 - 400 ms. 41 | params.heart_beat_interval_ = 100; 42 | params.election_timeout_lower_bound_ = 200; 43 | params.election_timeout_upper_bound_ = 400; 44 | #endif 45 | // Upto 5 logs will be preserved ahead the last snapshot. 46 | params.reserved_log_items_ = 5; 47 | // Snapshot will be created for every 5 log appends. 48 | params.snapshot_distance_ = 5; 49 | // Client timeout: 3000 ms. 50 | params.client_req_timeout_ = 10000; 51 | // According to this method, `append_log` function 52 | // should be handled differently. 53 | params.return_method_ = CALL_TYPE; 54 | 55 | // Logger. 56 | std::string log_file_name = "./srv" + std::to_string(node_id_) + ".log"; 57 | nuraft::ptr log_wrap = nuraft::cs_new(log_file_name); 58 | 59 | raft_instance_ = launcher_.init(sm_, smgr_, log_wrap, port_, asio_opt, params); 60 | 61 | if (!raft_instance_) { 62 | global_logger->error("Failed to initialize launcher (see the message in the log file)"); 63 | log_wrap.reset(); 64 | exit(-1); 65 | } 66 | 67 | // Wait until Raft server is ready (upto 5 seconds). 68 | const size_t max_try = 100; 69 | global_logger->info("init Raft instance "); 70 | for (size_t ii = 0; ii < max_try; ++ii) { 71 | if (raft_instance_->is_initialized()) { 72 | global_logger->info("done"); 73 | global_logger->debug("RaftStuff initialized with node_id: {}, endpoint: {}, port: {}", node_id_, endpoint_, 74 | port_); // 添加打印日志 75 | return; 76 | } 77 | global_logger->info("."); 78 | fflush(stdout); 79 | std::this_thread::sleep_for(std::chrono::milliseconds(250)); 80 | } 81 | global_logger->error("RaftStuff Init FAILED"); 82 | log_wrap.reset(); 83 | exit(-1); 84 | } 85 | 86 | auto RaftStuff::AddSrv(int srv_id, const std::string &srv_endpoint) -> bool { 87 | bool success = false; 88 | nuraft::ptr peer_srv_conf = nuraft::cs_new(srv_id, srv_endpoint); 89 | global_logger->debug("Adding server with srv_id: {}, srv_endpoint: {}", srv_id, srv_endpoint); // 添加打印日志 90 | auto ret = raft_instance_->add_srv(*peer_srv_conf); 91 | 92 | if (!ret->get_accepted()) { 93 | global_logger->error("raft_stuff AddSrv failed"); 94 | return false; 95 | } 96 | 97 | // Wait until it appears in server list. 98 | const size_t max_try = 40; 99 | for (size_t jj = 0; jj < max_try; ++jj) { 100 | global_logger->info("Wait for add follower."); 101 | std::this_thread::sleep_for(std::chrono::milliseconds(250)); 102 | auto conf = GetSrvConfig(srv_id); 103 | if (conf) { 104 | success = true; 105 | global_logger->info(" Add follower done."); 106 | break; 107 | } 108 | } 109 | return success; 110 | } 111 | 112 | auto RaftStuff::GetSrvConfig(int srv_id) -> nuraft::ptr { 113 | global_logger->debug("get server config with srv_id: {}", srv_id); // 添加打印日志 114 | return raft_instance_->get_srv_config(srv_id); 115 | } 116 | 117 | void RaftStuff::AppendEntries(const std::string &entry) { 118 | if (!raft_instance_ || !raft_instance_->is_leader()) { 119 | // 添加调试日志 120 | if (!raft_instance_) { 121 | global_logger->debug("Cannot append entries: Raft instance is not available"); 122 | } else { 123 | global_logger->debug("Cannot append entries: Current node is not the leader"); 124 | } 125 | return; 126 | } 127 | 128 | // 计算所需的内存大小 129 | size_t total_size = sizeof(int) + entry.size(); 130 | 131 | // 添加调试日志 132 | global_logger->debug("Total size of entry: {}", total_size); 133 | 134 | // 创建一个 Raft 日志条目 135 | nuraft::ptr log_entry_buffer = nuraft::buffer::alloc(total_size); 136 | nuraft::buffer_serializer bs_log(log_entry_buffer); 137 | 138 | bs_log.put_str(entry); 139 | 140 | // 添加调试日志 141 | global_logger->debug("Created log_entry_buffer at address: {}", static_cast(log_entry_buffer.get())); 142 | 143 | // 添加调试日志 144 | global_logger->debug("Appending entry to Raft instance"); 145 | 146 | // 将日志条目追加到 Raft 实例中 147 | auto ret = raft_instance_->append_entries({log_entry_buffer}); 148 | 149 | if (!ret->get_accepted()) { 150 | // Log append rejected, usually because this node is not a leader. 151 | global_logger->debug("Failed append log {}", static_cast(ret->get_result_code())); 152 | } 153 | // Log append accepted, but that doesn't mean the log is committed. 154 | // Commit result can be obtained below. 155 | 156 | if (CALL_TYPE == nuraft::raft_params::blocking) { 157 | // Blocking mode: 158 | // `append_entries` returns after getting a consensus, 159 | // so that `ret` already has the result from state machine. 160 | HandleResult(*ret); 161 | 162 | } else if (CALL_TYPE == nuraft::raft_params::async_handler) { 163 | // Async mode: 164 | // `append_entries` returns immediately. 165 | // `handle_result` will be invoked asynchronously, 166 | // after getting a consensus. 167 | ret->when_ready(std::bind(&RaftStuff::HandleResult, this, std::placeholders::_1)); 168 | } else { 169 | assert(0); 170 | } 171 | } 172 | 173 | void RaftStuff::EnableElectionTimeout(int lower_bound, int upper_bound) { 174 | if (raft_instance_) { 175 | nuraft::raft_params params = raft_instance_->get_current_params(); 176 | params.election_timeout_lower_bound_ = lower_bound; 177 | params.election_timeout_upper_bound_ = upper_bound; 178 | raft_instance_->update_params(params); 179 | } 180 | } 181 | 182 | auto RaftStuff::IsLeader() const -> bool { 183 | if (!raft_instance_) { 184 | return false; 185 | } 186 | return raft_instance_->is_leader(); // 调用 raft_instance_ 的 is_leader() 方法 187 | } 188 | 189 | auto RaftStuff::GetAllNodesInfo() const 190 | -> std::vector> { 191 | std::vector> nodes_info; 192 | 193 | if (!raft_instance_) { 194 | global_logger->warn("raft_instance empty"); 195 | return nodes_info; 196 | } 197 | 198 | // 获取配置信息 199 | 200 | // get_srv_config_all 201 | // auto config = raft_instance_->get_config(); 202 | // if (!config) { 203 | // return nodes_info; 204 | // } 205 | 206 | // 获取服务器列表 207 | // auto servers = config->get_servers(); 208 | 209 | std::vector> configs; 210 | raft_instance_->get_srv_config_all(configs); 211 | 212 | int leader_id = raft_instance_->get_leader(); 213 | for (auto &entry : configs) { 214 | nuraft::ptr &srv = entry; 215 | // 获取节点状态 216 | std::string node_state; 217 | if (srv->get_id() == leader_id) { 218 | node_state = "leader"; 219 | } else { 220 | node_state = "follower"; 221 | } 222 | 223 | // 使用正确的类型 224 | nuraft::raft_server::peer_info node_info = raft_instance_->get_peer_info(srv->get_id()); 225 | nuraft::ulong last_log_idx = node_info.last_log_idx_; 226 | nuraft::ulong last_succ_resp_us = node_info.last_succ_resp_us_; 227 | 228 | nodes_info.emplace_back( 229 | std::make_tuple(srv->get_id(), srv->get_endpoint(), node_state, last_log_idx, last_succ_resp_us)); 230 | } 231 | return nodes_info; 232 | } 233 | 234 | auto RaftStuff::GetCurrentNodesInfo() const -> std::tuple { 235 | std::tuple nodes_info; 236 | 237 | if (!raft_instance_) { 238 | return nodes_info; 239 | } 240 | 241 | // 获取配置信息 242 | auto config = raft_instance_->get_config(); 243 | if (!config) { 244 | return nodes_info; 245 | } 246 | 247 | // 获取服务器列表 248 | auto servers = config->get_servers(); 249 | 250 | for (const auto &srv : servers) { 251 | if (srv && srv->get_id() == node_id_) { 252 | // 获取节点状态 253 | std::string node_state; 254 | if (srv->get_id() == raft_instance_->get_leader()) { 255 | node_state = "leader"; 256 | } else { 257 | node_state = "follower"; 258 | } 259 | 260 | // 使用正确的类型 261 | nuraft::raft_server::peer_info node_info = raft_instance_->get_peer_info(srv->get_id()); 262 | nuraft::ulong last_log_idx = node_info.last_log_idx_; 263 | nuraft::ulong last_succ_resp_us = node_info.last_succ_resp_us_; 264 | nodes_info = std::make_tuple(srv->get_id(), srv->get_endpoint(), node_state, last_log_idx, last_succ_resp_us); 265 | break; 266 | } 267 | } 268 | 269 | return nodes_info; 270 | } 271 | 272 | void RaftStuff::HandleResult(nuraft::cmd_result> &result) { 273 | if (result.get_result_code() != nuraft::cmd_result_code::OK) { 274 | // Something went wrong. 275 | // This means committing this log failed, 276 | // but the log itself is still in the log store. 277 | global_logger->error("failed: {}", static_cast(result.get_result_code())); 278 | return; 279 | } 280 | global_logger->info("succeed"); 281 | } 282 | 283 | } // namespace vectordb -------------------------------------------------------------------------------- /src/common/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( 2 | vectorDB_common 3 | OBJECT 4 | vector_cfg.cpp 5 | proxy_cfg.cpp 6 | master_cfg.cpp 7 | vector_init.cpp 8 | ) 9 | 10 | set(ALL_OBJECT_FILES 11 | ${ALL_OBJECT_FILES} $ 12 | PARENT_SCOPE) -------------------------------------------------------------------------------- /src/common/master_cfg.cpp: -------------------------------------------------------------------------------- 1 | #include "common/master_cfg.h" 2 | #include 3 | #include 4 | #include "common/proxy_cfg.h" 5 | 6 | // example master_config: 7 | // { 8 | // "MASTER_HOST" : "0.0.0.0", 9 | // "MASTER_PORT" : 6060, 10 | // "ETCD_ENDPOINTS" : "http://127.0.0.1:2379", 11 | 12 | // "LOG":{ 13 | // "LOG_NAME" : "my_log", 14 | // "LOG_LEVEL" : 1 15 | // } 16 | // } 17 | 18 | namespace vectordb { 19 | 20 | std::string MasterCfg::cfg_path{}; 21 | 22 | void MasterCfg::ParseCfgFile(const std::string &path) { 23 | std::ifstream config_file(path); 24 | 25 | rapidjson::IStreamWrapper isw(config_file); 26 | 27 | rapidjson::Document data; 28 | 29 | data.ParseStream(isw); 30 | 31 | assert(data.IsObject()); 32 | 33 | if (data.HasMember("MASTER_HOST") && data["MASTER_HOST"].IsString()) { 34 | master_host_ = data["MASTER_HOST"].GetString(); 35 | } else { 36 | std::cout << "MASTER_HOST fault" << std::endl; 37 | } 38 | 39 | if (data.HasMember("MASTER_PORT") && data["MASTER_PORT"].IsInt()) { 40 | master_port_ = data["MASTER_PORT"].GetInt(); 41 | } else { 42 | std::cout << "MASTER_PORT fault" << std::endl; 43 | } 44 | 45 | if (data.HasMember("ETCD_ENDPOINTS") && data["ETCD_ENDPOINTS"].IsString()) { 46 | etcd_endpoints_ = data["ETCD_ENDPOINTS"].GetString(); 47 | } else { 48 | std::cout << "ETCD_ENDPOINTS fault" << std::endl; 49 | } 50 | 51 | if (data.HasMember("LOG") && data["LOG"].IsObject()) { 52 | if (data["LOG"].HasMember("LOG_NAME") && data["LOG"]["LOG_NAME"].IsString()) { 53 | m_log_cfg_.m_glog_name_ = data["LOG"]["LOG_NAME"].GetString(); 54 | } else { 55 | std::cout << "LOG_NAME fault" << std::endl; 56 | } 57 | 58 | if (data["LOG"].HasMember("LOG_LEVEL") && data["LOG"]["LOG_LEVEL"].IsInt()) { 59 | m_log_cfg_.m_level_ = static_cast(data["LOG"]["LOG_LEVEL"].GetInt()); 60 | } else { 61 | std::cout << "LOG_LEVEL fault" << std::endl; 62 | } 63 | 64 | } else { 65 | std::cout << "LOG fault" << std::endl; 66 | } 67 | } 68 | 69 | } // namespace vectordb -------------------------------------------------------------------------------- /src/common/proxy_cfg.cpp: -------------------------------------------------------------------------------- 1 | #include "common/proxy_cfg.h" 2 | #include 3 | #include 4 | 5 | // example proxy_config: 6 | // { 7 | // "INSTANCE_ID" : 1, 8 | // "PROXY_ADDRESS" : "0.0.0.0", 9 | // "PROXY_PORT" : 80, 10 | 11 | // "MASTER_HOST" : "127.0.0.1", 12 | // "MASTER_PORT" : 6060, 13 | 14 | // "READ_PATHS" :[ "/search"], 15 | // "WRITE_PATHS" :[ "/upsert"], 16 | 17 | // "LOG":{ 18 | // "LOG_NAME" : "my_log", 19 | // "LOG_LEVEL" : 1 20 | // } 21 | // } 22 | 23 | namespace vectordb { 24 | 25 | std::string ProxyCfg::cfg_path{}; 26 | 27 | void ProxyCfg::ParseCfgFile(const std::string &path) { 28 | std::ifstream config_file(path); 29 | 30 | rapidjson::IStreamWrapper isw(config_file); 31 | 32 | rapidjson::Document data; 33 | 34 | data.ParseStream(isw); 35 | 36 | assert(data.IsObject()); 37 | 38 | if (data.HasMember("INSTANCE_ID") && data["INSTANCE_ID"].IsInt()) { 39 | instance_id_ = data["INSTANCE_ID"].GetInt(); 40 | } else { 41 | std::cout << "INSTANCE_ID fault" << std::endl; 42 | } 43 | 44 | if (data.HasMember("PROXY_ADDRESS") && data["PROXY_ADDRESS"].IsString()) { 45 | proxy_host_ = data["PROXY_ADDRESS"].GetString(); 46 | } else { 47 | std::cout << "PROXY_ADDRESS fault" << std::endl; 48 | } 49 | 50 | if (data.HasMember("PROXY_PORT") && data["PROXY_PORT"].IsInt()) { 51 | proxy_port_ = data["PROXY_PORT"].GetInt(); 52 | } else { 53 | std::cout << "PROXY_PORT fault" << std::endl; 54 | } 55 | 56 | if (data.HasMember("MASTER_HOST") && data["MASTER_HOST"].IsString()) { 57 | master_host_ = data["MASTER_HOST"].GetString(); 58 | } else { 59 | std::cout << "MASTER_HOST fault" << std::endl; 60 | } 61 | 62 | if (data.HasMember("MASTER_PORT") && data["MASTER_PORT"].IsInt()) { 63 | master_port_ = data["MASTER_PORT"].GetInt(); 64 | } else { 65 | std::cout << "MASTER_PORT fault" << std::endl; 66 | } 67 | 68 | if (data.HasMember("READ_PATHS") && data["READ_PATHS"].IsArray()) { 69 | for (auto &read_path : data["READ_PATHS"].GetArray()) { 70 | if (read_path.IsString()) { 71 | read_paths_.emplace(read_path.GetString()); 72 | } else { 73 | std::cout << "READ_PATH fault" << std::endl; 74 | } 75 | } 76 | } else { 77 | std::cout << "READ_PATHS fault" << std::endl; 78 | } 79 | 80 | if (data.HasMember("WRITE_PATHS") && data["WRITE_PATHS"].IsArray()) { 81 | for (auto &write_path : data["WRITE_PATHS"].GetArray()) { 82 | if (write_path.IsString()) { 83 | write_paths_.emplace(write_path.GetString()); 84 | } else { 85 | std::cout << "WRITE_PATH fault" << std::endl; 86 | } 87 | } 88 | } else { 89 | std::cout << "WRITE_PATHS fault" << std::endl; 90 | } 91 | 92 | if (data.HasMember("LOG") && data["LOG"].IsObject()) { 93 | if (data["LOG"].HasMember("LOG_NAME") && data["LOG"]["LOG_NAME"].IsString()) { 94 | m_log_cfg_.m_glog_name_ = data["LOG"]["LOG_NAME"].GetString(); 95 | } else { 96 | std::cout << "LOG_NAME fault" << std::endl; 97 | } 98 | 99 | if (data["LOG"].HasMember("LOG_LEVEL") && data["LOG"]["LOG_LEVEL"].IsInt()) { 100 | m_log_cfg_.m_level_ = static_cast(data["LOG"]["LOG_LEVEL"].GetInt()); 101 | } else { 102 | std::cout << "LOG_LEVEL fault" << std::endl; 103 | } 104 | 105 | } else { 106 | std::cout << "LOG fault" << std::endl; 107 | } 108 | } 109 | 110 | } // namespace vectordb -------------------------------------------------------------------------------- /src/common/vector_cfg.cpp: -------------------------------------------------------------------------------- 1 | #include "common/vector_cfg.h" 2 | #include 3 | #include 4 | 5 | // example vectordb_config: 6 | 7 | // { 8 | // "CLUSTER_INFO" :[ 9 | // { 10 | // "RAFT":{ 11 | // "NODE_ID":1, 12 | // "ENDPOINT":"127.0.0.1:8081", 13 | // "PORT":8081 14 | // }, 15 | // "ROCKS_DB_PATH" : "/home/zhouzj/vectordb1/storage", 16 | // "WAL_PATH" : "/home/zhouzj/vectordb1/wal", 17 | // "SNAP_PATH" : "/home/zhouzj/vectordb1/snap/", 18 | // "ADDRESS" : "0.0.0.0", 19 | // "PORT" : 7781 20 | // }, 21 | // { 22 | // "RAFT":{ 23 | // "NODE_ID":2, 24 | // "ENDPOINT":"127.0.0.1:8082", 25 | // "PORT":8082 26 | // }, 27 | // "ROCKS_DB_PATH" : "/home/zhouzj/vectordb2/storage", 28 | // "WAL_PATH" : "/home/zhouzj/vectordb2/wal", 29 | // "SNAP_PATH" : "/home/zhouzj/vectordb2/snap/", 30 | // "ADDRESS" : "0.0.0.0", 31 | // "PORT" : 7782 32 | 33 | // } 34 | // ], 35 | // "LOG":{ 36 | // "LOG_NAME" : "my_log", 37 | // "LOG_LEVEL" : 1 38 | // }, 39 | // "TEST_ROCKS_DB_PATH" : "/home/zhouzj/test_vectordb/storage", 40 | // "TEST_WAL_PATH" : "/home/zhouzj/test_vectordb/wal", 41 | // "TEST_SNAP_PATH" : "/home/zhouzj/test_vectordb/snap/" 42 | 43 | // } 44 | 45 | // vdb_server use these: 46 | // "CLUSTER_INFO" :[ 47 | // { 48 | // "RAFT":{ 49 | // "NODE_ID":1, 50 | // "ENDPOINT":"127.0.0.1:8081", 51 | // "PORT":8081 52 | // }, 53 | // "ROCKS_DB_PATH" : "/home/zhouzj/vectordb1/storage", 54 | // "WAL_PATH" : "/home/zhouzj/vectordb1/wal", 55 | // "SNAP_PATH" : "/home/zhouzj/vectordb1/snap/", 56 | // "ADDRESS" : "0.0.0.0", 57 | // "PORT" : 7781 58 | // }, 59 | // { 60 | // "RAFT":{ 61 | // "NODE_ID":2, 62 | // "ENDPOINT":"127.0.0.1:8082", 63 | // "PORT":8082 64 | // }, 65 | // "ROCKS_DB_PATH" : "/home/zhouzj/vectordb2/storage", 66 | // "WAL_PATH" : "/home/zhouzj/vectordb2/wal", 67 | // "SNAP_PATH" : "/home/zhouzj/vectordb2/snap/", 68 | // "ADDRESS" : "0.0.0.0", 69 | // "PORT" : 7782 70 | 71 | // } 72 | // ], 73 | 74 | // gtest use these: 75 | // "TEST_ROCKS_DB_PATH" : "/home/zhouzj/test_vectordb/storage", 76 | // "TEST_WAL_PATH" : "/home/zhouzj/test_vectordb/wal", 77 | // "TEST_SNAP_PATH" : "/home/zhouzj/test_vectordb/snap/", 78 | 79 | namespace vectordb { 80 | 81 | std::string Cfg::cfg_path{}; 82 | int Cfg::node_id{0}; 83 | 84 | void Cfg::ParseCfgFile(const std::string &path,const int &node_id) { 85 | std::ifstream config_file(path); 86 | 87 | rapidjson::IStreamWrapper isw(config_file); 88 | 89 | rapidjson::Document data; 90 | 91 | data.ParseStream(isw); 92 | 93 | assert(data.IsObject()); 94 | 95 | if (data.HasMember("CLUSTER_INFO") && data["CLUSTER_INFO"].IsArray()) { 96 | for (auto &node_cfg : data["CLUSTER_INFO"].GetArray()) { 97 | if (node_cfg.HasMember("RAFT") && node_cfg["RAFT"].IsObject()) { 98 | if (node_cfg["RAFT"].HasMember("NODE_ID") && node_cfg["RAFT"]["NODE_ID"].IsInt()) { 99 | int raft_node_id = node_cfg["RAFT"]["NODE_ID"].GetInt(); 100 | if (raft_node_id == node_id) { 101 | if (node_cfg.HasMember("ROCKS_DB_PATH") && node_cfg["ROCKS_DB_PATH"].IsString()) { 102 | m_rocks_db_path_ = node_cfg["ROCKS_DB_PATH"].GetString(); 103 | } else { 104 | std::cout << "ROCKS_DB_PATH fault" << std::endl; 105 | } 106 | 107 | if (node_cfg.HasMember("WAL_PATH") && node_cfg["WAL_PATH"].IsString()) { 108 | wal_path_ = node_cfg["WAL_PATH"].GetString(); 109 | } else { 110 | std::cout << "WAL_PATH fault" << std::endl; 111 | } 112 | if (node_cfg.HasMember("SNAP_PATH") && node_cfg["SNAP_PATH"].IsString()) { 113 | snap_path_ = node_cfg["SNAP_PATH"].GetString(); 114 | } else { 115 | std::cout << "SNAP_PATH fault" << std::endl; 116 | } 117 | 118 | 119 | if (node_cfg["RAFT"].HasMember("NODE_ID") && node_cfg["RAFT"]["NODE_ID"].IsInt()) { 120 | raft_cfg_.node_id_ = node_cfg["RAFT"]["NODE_ID"].GetInt(); 121 | } else { 122 | std::cout << "NODE_ID fault" << std::endl; 123 | } 124 | 125 | if (node_cfg["RAFT"].HasMember("ENDPOINT") && node_cfg["RAFT"]["ENDPOINT"].IsString()) { 126 | raft_cfg_.endpoint_ = node_cfg["RAFT"]["ENDPOINT"].GetString(); 127 | } else { 128 | std::cout << "ENDPOINT fault" << std::endl; 129 | } 130 | if (node_cfg["RAFT"].HasMember("PORT") && node_cfg["RAFT"]["PORT"].IsInt()) { 131 | raft_cfg_.port_ = node_cfg["RAFT"]["PORT"].GetInt(); 132 | } else { 133 | std::cout << "PORT fault" << std::endl; 134 | } 135 | 136 | 137 | 138 | if (node_cfg.HasMember("PORT") && node_cfg["PORT"].IsInt()) { 139 | port_ = node_cfg["PORT"].GetInt(); 140 | } else { 141 | std::cout << "PORT fault" << std::endl; 142 | } 143 | 144 | if (node_cfg.HasMember("ADDRESS") && node_cfg["ADDRESS"].IsString()) { 145 | address_ = node_cfg["ADDRESS"].GetString(); 146 | } else { 147 | std::cout << "ADDRESS fault" << std::endl; 148 | } 149 | } 150 | } else { 151 | std::cout << "NODE_ID fault" << std::endl; 152 | } 153 | } else { 154 | std::cout << "RAFT fault" << std::endl; 155 | } 156 | } 157 | } 158 | 159 | if (data.HasMember("TEST_ROCKS_DB_PATH") && data["TEST_ROCKS_DB_PATH"].IsString()) { 160 | test_rocks_db_path_ = data["TEST_ROCKS_DB_PATH"].GetString(); 161 | } else { 162 | std::cout << "TEST_ROCKS_DB_PATH fault" << std::endl; 163 | } 164 | 165 | if (data.HasMember("TEST_WAL_PATH") && data["TEST_WAL_PATH"].IsString()) { 166 | test_wal_path_ = data["TEST_WAL_PATH"].GetString(); 167 | } else { 168 | std::cout << "TEST_WAL_PATH fault" << std::endl; 169 | } 170 | if (data.HasMember("TEST_SNAP_PATH") && data["TEST_SNAP_PATH"].IsString()) { 171 | test_snap_path_ = data["TEST_SNAP_PATH"].GetString(); 172 | } else { 173 | std::cout << "TEST_SNAP_PATH fault" << std::endl; 174 | } 175 | 176 | if (data.HasMember("LOG") && data["LOG"].IsObject()) { 177 | if (data["LOG"].HasMember("LOG_NAME") && data["LOG"]["LOG_NAME"].IsString()) { 178 | m_log_cfg_.m_glog_name_ = data["LOG"]["LOG_NAME"].GetString(); 179 | } else { 180 | std::cout << "LOG_NAME fault" << std::endl; 181 | } 182 | 183 | if (data["LOG"].HasMember("LOG_LEVEL") && data["LOG"]["LOG_LEVEL"].IsInt()) { 184 | m_log_cfg_.m_level_ = static_cast(data["LOG"]["LOG_LEVEL"].GetInt()); 185 | } else { 186 | std::cout << "LOG_LEVEL fault" << std::endl; 187 | } 188 | 189 | } else { 190 | std::cout << "LOG fault" << std::endl; 191 | } 192 | } 193 | 194 | } // namespace vectordb -------------------------------------------------------------------------------- /src/common/vector_init.cpp: -------------------------------------------------------------------------------- 1 | #include "common/vector_init.h" 2 | #include "common/master_cfg.h" 3 | #include "common/proxy_cfg.h" 4 | #include "common/vector_cfg.h" 5 | #include "index/index_factory.h" 6 | #include "logger/logger.h" 7 | #include "database/persistence.h" 8 | namespace vectordb { 9 | void VdbServerInit(int node_id) { 10 | auto cfg_path = GetCfgPath("vectordb_config"); 11 | Cfg::SetCfg(cfg_path,node_id); 12 | InitGlobalLogger(Cfg::Instance().GlogName()); 13 | SetLogLevel(Cfg::Instance().GlogLevel()); 14 | auto &indexfactory = IndexFactory::Instance(); 15 | int dim = 1; // 向量维度 16 | indexfactory.Init(IndexFactory::IndexType::FLAT, dim, 100); 17 | indexfactory.Init(IndexFactory::IndexType::HNSW, dim, 100); 18 | indexfactory.Init(IndexFactory::IndexType::FILTER, dim, 100); 19 | } 20 | 21 | 22 | void ProxyServerInit() { 23 | auto cfg_path = GetCfgPath("proxy_config"); 24 | ProxyCfg::SetCfg(cfg_path); 25 | InitGlobalLogger(ProxyCfg::Instance().GlogName()); 26 | SetLogLevel(ProxyCfg::Instance().GlogLevel()); 27 | } 28 | 29 | void MasterServerInit() { 30 | auto cfg_path = GetCfgPath("master_config"); 31 | MasterCfg::SetCfg(cfg_path); 32 | InitGlobalLogger(MasterCfg::Instance().GlogName()); 33 | SetLogLevel(MasterCfg::Instance().GlogLevel()); 34 | } 35 | 36 | } // namespace vectordb -------------------------------------------------------------------------------- /src/database/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( 2 | vectorDB_database 3 | OBJECT 4 | scalar_storage.cpp 5 | vector_database.cpp 6 | persistence.cpp 7 | ) 8 | 9 | set(ALL_OBJECT_FILES 10 | ${ALL_OBJECT_FILES} $ 11 | PARENT_SCOPE) -------------------------------------------------------------------------------- /src/database/persistence.cpp: -------------------------------------------------------------------------------- 1 | #include "database/persistence.h" 2 | #include // 包含 以使用 rapidjson::Document 类型 3 | #include // 包含 rapidjson/stringbuffer.h 以使用 StringBuffer 类 4 | #include // 包含 rapidjson/writer.h 以使用 Writer 类 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "common/vector_utils.h" 11 | #include "logger/logger.h" 12 | namespace vectordb { 13 | 14 | Persistence::Persistence() : increase_id_(10), last_snapshot_id_(0) {} 15 | 16 | Persistence::~Persistence() { 17 | if (wal_log_file_.is_open()) { 18 | wal_log_file_.close(); 19 | } 20 | } 21 | 22 | void Persistence::Init(const std::string &local_path) { 23 | if (!std::filesystem::exists(local_path)) { 24 | // 文件不存在,先创建文件 25 | std::ofstream temp_file(local_path); 26 | temp_file.close(); 27 | } 28 | 29 | wal_log_file_.open(local_path, std::ios::in | std::ios::out | 30 | std::ios::app); // 以 std::ios::in | std::ios::out | std::ios::app 模式打开文件 31 | if (!wal_log_file_.is_open()) { 32 | global_logger->error("An error occurred while writing the WAL log entry. Reason: {}", 33 | std::strerror(errno)); // 使用日志打印错误消息和原因 34 | throw std::runtime_error("Failed to open WAL log file at path: " + local_path); 35 | } 36 | 37 | LoadLastSnapshotId(Cfg::Instance().SnapPath()); 38 | } 39 | 40 | auto Persistence::IncreaseId() -> uint64_t { 41 | increase_id_++; 42 | return increase_id_; 43 | } 44 | 45 | auto Persistence::GetId() const -> uint64_t { return increase_id_; } 46 | 47 | void Persistence::WriteWalLog(const std::string &operation_type, const rapidjson::Document &json_data, 48 | const std::string &version) { // 添加 version 参数 49 | uint64_t log_id = IncreaseId(); 50 | 51 | rapidjson::StringBuffer buffer; 52 | rapidjson::Writer writer(buffer); 53 | json_data.Accept(writer); 54 | 55 | std::string json_data_str = buffer.GetString(); 56 | // 拼接日志条目 57 | std::ostringstream oss; 58 | oss << log_id << "|" << version << "|" << operation_type << "|" << json_data_str; 59 | 60 | // 压缩日志条目 61 | std::string compressed_data; 62 | snappy::Compress(oss.str().c_str(), oss.str().size(), &compressed_data); 63 | 64 | // 写入压缩后的日志条目到文件 65 | wal_log_file_ << compressed_data << std::endl; 66 | 67 | if (wal_log_file_.fail()) { // 检查是否发生错误 68 | global_logger->error("An error occurred while writing the WAL log entry. Reason: {}", 69 | std::strerror(errno)); // 使用日志打印错误消息和原因 70 | } else { 71 | global_logger->debug("Wrote WAL log entry: log_id={}, version={}, operation_type={}, json_data_str={}", log_id, 72 | version, operation_type, buffer.GetString()); // 打印日志 73 | wal_log_file_.flush(); // 强制持久化 74 | } 75 | } 76 | 77 | void Persistence::WriteWalRawLog(uint64_t log_id, const std::string &operation_type, const std::string &raw_data, 78 | const std::string &version) { 79 | // 拼接日志条目 80 | std::ostringstream oss; 81 | oss << log_id << "|" << version << "|" << operation_type << "|" << raw_data; 82 | 83 | // 压缩日志条目 84 | std::string compressed_data; 85 | snappy::Compress(oss.str().c_str(), oss.str().size(), &compressed_data); 86 | 87 | // 写入压缩后的日志条目到文件 88 | wal_log_file_ << compressed_data << std::endl; 89 | 90 | if (wal_log_file_.fail()) { // 检查是否发生错误 91 | global_logger->error("An error occurred while writing the WAL raw log entry. Reason: {}", 92 | std::strerror(errno)); // 使用日志打印错误消息和原因 93 | } else { 94 | global_logger->debug("Wrote WAL raw log entry: log_id={}, version={}, operation_type={}, raw_data={}", log_id, 95 | version, operation_type, raw_data); // 打印日志 96 | wal_log_file_.flush(); // 强制持久化 97 | } 98 | } 99 | 100 | void Persistence::ReadNextWalLog(std::string *operation_type, rapidjson::Document *json_data) { 101 | global_logger->debug("Reading next WAL log entry"); 102 | 103 | std::string compressed_line; 104 | while (std::getline(wal_log_file_, compressed_line)) { 105 | std::string decompressed_data; 106 | if (!snappy::Uncompress(compressed_line.c_str(), compressed_line.size(), &decompressed_data)) { 107 | global_logger->error("Failed to decompress WAL log entry"); 108 | return; 109 | } 110 | 111 | // 解析解压后的日志条目 112 | std::istringstream iss(decompressed_data); 113 | 114 | std::string log_id_str; 115 | std::string version; 116 | std::string json_data_str; 117 | 118 | std::getline(iss, log_id_str, '|'); 119 | std::getline(iss, version, '|'); 120 | std::getline(iss, *operation_type, '|'); // 使用指针参数返回 operation_type 121 | std::getline(iss, json_data_str, '|'); 122 | 123 | uint64_t log_id = std::stoull(log_id_str); // 将 log_id_str 转换为 uint64_t 类型 124 | if (log_id > increase_id_) { // 如果 log_id 大于当前 increase_id_ 125 | increase_id_ = log_id; // 更新 increase_id_ 126 | } 127 | 128 | if (log_id > last_snapshot_id_) { 129 | json_data->Parse(json_data_str.c_str()); // 使用指针参数返回 json_data 130 | global_logger->debug("Read WAL log entry: log_id={}, operation_type={}, json_data_str={}", log_id_str, 131 | *operation_type, json_data_str); 132 | return; 133 | } 134 | // TODO(zhouzj): 增加last_snapshot_id_ 前WAL LOG的清除 135 | global_logger->debug("Skip Read WAL log entry: log_id={}, operation_type={}, json_data_str={}", log_id_str, 136 | *operation_type, json_data_str); 137 | } 138 | operation_type->clear(); 139 | wal_log_file_.clear(); 140 | global_logger->debug("No more WAL log entries to read"); 141 | } 142 | 143 | void Persistence::TakeSnapshot() { // 移除 takeSnapshot 方法的参数 144 | global_logger->debug("Taking snapshot"); // 添加调试信息 145 | 146 | last_snapshot_id_ = increase_id_; 147 | std::string snapshot_folder_path = Cfg::Instance().SnapPath(); 148 | auto &index_factory = IndexFactory::Instance(); // 通过全局指针获取 IndexFactory 实例 149 | index_factory.SaveIndex(snapshot_folder_path); 150 | SaveLastSnapshotId(snapshot_folder_path); 151 | } 152 | 153 | void Persistence::LoadSnapshot() { // 添加 loadSnapshot 方法实现 154 | global_logger->debug("Loading snapshot"); // 添加调试信息 155 | auto &index_factory = IndexFactory::Instance(); 156 | std::string snapshot_folder_path = Cfg::Instance().SnapPath(); 157 | index_factory.LoadIndex(snapshot_folder_path); // 将 scalar_storage 传递给 loadIndex 方法 158 | } 159 | 160 | void Persistence::SaveLastSnapshotId(const std::string &folder_path) { // 添加 saveLastSnapshotID 方法实现 161 | std::string file_path = folder_path + "MaxLogID"; 162 | std::ofstream file("file_path"); 163 | if (file.is_open()) { 164 | file << last_snapshot_id_; 165 | file.close(); 166 | } else { 167 | global_logger->error("Failed to open file snapshots_MaxID for writing"); 168 | } 169 | global_logger->debug("save snapshot Max log ID {}", last_snapshot_id_); // 添加调试信息 170 | } 171 | 172 | void Persistence::LoadLastSnapshotId(const std::string &folder_path) { // 添加 loadLastSnapshotID 方法实现 173 | std::string file_path = folder_path + ".MaxLogID"; 174 | std::ifstream file("file_path"); 175 | if (file.is_open()) { 176 | file >> last_snapshot_id_; 177 | file.close(); 178 | } else { 179 | global_logger->warn("Failed to open file snapshots_MaxID for reading"); 180 | } 181 | 182 | global_logger->debug("Loading snapshot Max log ID {}", last_snapshot_id_); // 添加调试信息 183 | } 184 | 185 | } // namespace vectordb -------------------------------------------------------------------------------- /src/database/scalar_storage.cpp: -------------------------------------------------------------------------------- 1 | #include "database/scalar_storage.h" 2 | #include "logger/logger.h" 3 | #include 4 | #include 5 | #include // 包含rapidjson/stringbuffer.h头文件 6 | #include 7 | #include 8 | namespace vectordb { 9 | 10 | ScalarStorage::ScalarStorage(const std::string& db_path) { 11 | rocksdb::Options options; 12 | options.create_if_missing = true; 13 | rocksdb::Status status = rocksdb::DB::Open(options, db_path, &db_); 14 | if (!status.ok()) { 15 | throw std::runtime_error("Failed to open RocksDB: " + status.ToString()); 16 | } 17 | } 18 | 19 | ScalarStorage::~ScalarStorage() { 20 | delete db_; 21 | } 22 | 23 | void ScalarStorage::InsertScalar(uint64_t id, const rapidjson::Document& data) { // 将参数类型更改为rapidjson::Document 24 | rapidjson::StringBuffer buffer; 25 | rapidjson::Writer writer(buffer); 26 | data.Accept(writer); 27 | std::string value = buffer.GetString(); 28 | 29 | rocksdb::Status status = db_->Put(rocksdb::WriteOptions(), std::to_string(id), value); 30 | if (!status.ok()) { 31 | global_logger->error("Failed to insert scalar: {}", status.ToString()); // 使用GlobalLogger打印错误日志 32 | } 33 | } 34 | 35 | auto ScalarStorage::GetScalar(uint64_t id) -> rapidjson::Document { // 将返回类型更改为rapidjson::Document 36 | std::string value; 37 | rocksdb::Status status = db_->Get(rocksdb::ReadOptions(), std::to_string(id), &value); 38 | if (!status.ok()) { 39 | return rapidjson::Document(); // 返回一个空的rapidjson::Document对象 40 | } 41 | 42 | rapidjson::Document data; 43 | data.Parse(value.c_str()); 44 | 45 | // 打印从ScalarStorage获取的数据和rocksdb::Status status 46 | rapidjson::StringBuffer buffer; 47 | rapidjson::Writer writer(buffer); 48 | data.Accept(writer); 49 | global_logger->debug("Data retrieved from ScalarStorage: {}, RocksDB status: {}", buffer.GetString(), status.ToString()); // 添加rocksdb::Status status 50 | 51 | return data; 52 | } 53 | } // namespace vectordb -------------------------------------------------------------------------------- /src/database/vector_database.cpp: -------------------------------------------------------------------------------- 1 | #include "database/vector_database.h" 2 | #include 3 | #include 4 | #include 5 | #include "common/constants.h" 6 | #include "database/scalar_storage.h" 7 | #include "index/faiss_index.h" 8 | #include "index/filter_index.h" 9 | #include "index/hnswlib_index.h" 10 | #include "index/index_factory.h" 11 | #include "logger/logger.h" 12 | #include // 包含 rapidjson/stringbuffer.h 以使用 StringBuffer 类 13 | #include // 包含 rapidjson/writer.h 以使用 Writer 类 14 | 15 | namespace vectordb { 16 | 17 | VectorDatabase::VectorDatabase(const std::string &db_path, const std::string& wal_path) : scalar_storage_(db_path) { 18 | persistence_.Init(wal_path); // 初始化 persistence_ 对象 19 | } 20 | 21 | void VectorDatabase::ReloadDatabase() { 22 | global_logger->info("Entering VectorDatabase::reloadDatabase()"); // 在方法开始时打印日志 23 | 24 | persistence_.LoadSnapshot(); 25 | 26 | std::string operation_type; 27 | rapidjson::Document json_data; 28 | persistence_.ReadNextWalLog(&operation_type, &json_data); // 通过指针的方式调用 readNextWALLog 29 | 30 | while (!operation_type.empty()) { 31 | global_logger->info("Operation Type: {}", operation_type); 32 | 33 | // 打印读取的一行内容fmt::detail::buffer 34 | rapidjson::StringBuffer buffer; 35 | rapidjson::Writer writer(buffer); 36 | json_data.Accept(writer); 37 | global_logger->info("Read Line: {}", buffer.GetString()); 38 | 39 | if (operation_type == "upsert") { 40 | uint64_t id = json_data[REQUEST_ID].GetUint64(); 41 | IndexFactory::IndexType index_type = GetIndexTypeFromRequest(json_data); 42 | 43 | Upsert(id, json_data, index_type); // 调用 VectorDatabase::upsert 接口重建数据 44 | } 45 | 46 | // 清空 json_data 47 | rapidjson::Document().Swap(json_data); 48 | 49 | // 读取下一条 WAL 日志 50 | operation_type.clear(); 51 | persistence_.ReadNextWalLog(&operation_type, &json_data); 52 | } 53 | } 54 | 55 | void VectorDatabase::WriteWalLog(const std::string& operation_type, const rapidjson::Document& json_data) { 56 | std::string version = "1.0"; // 您可以根据需要设置版本 57 | persistence_.WriteWalLog(operation_type, json_data, version); // 将 version 传递给 writeWALLog 方法 58 | } 59 | 60 | void VectorDatabase::WriteWalLogWithId(uint64_t log_id, const std::string& data) { 61 | std::string operation_type = "upsert"; // 默认 operation_type 为 upsert 62 | std::string version = "1.0"; // 您可以根据需要设置版本 63 | persistence_.WriteWalRawLog(log_id, operation_type, data, version); // 调用 persistence_ 的 writeWALRawLog 方法 64 | } 65 | 66 | auto VectorDatabase::GetIndexTypeFromRequest(const rapidjson::Document& json_request) -> IndexFactory::IndexType { 67 | // 获取请求参数中的索引类型 68 | if (json_request.HasMember(REQUEST_INDEX_TYPE) && json_request[REQUEST_INDEX_TYPE].IsString()) { 69 | std::string index_type_str = json_request[REQUEST_INDEX_TYPE].GetString(); 70 | if (index_type_str == INDEX_TYPE_FLAT) { 71 | return IndexFactory::IndexType::FLAT; 72 | } if (index_type_str == INDEX_TYPE_HNSW) { 73 | return IndexFactory::IndexType::HNSW; 74 | } 75 | } 76 | return IndexFactory::IndexType::UNKNOWN; // 返回UNKNOWN值 77 | } 78 | 79 | void VectorDatabase::Upsert(uint64_t id, const rapidjson::Document &data, 80 | vectordb::IndexFactory::IndexType index_type) { 81 | // 检查标量存储中是否存在给定ID的向量 82 | rapidjson::Document existing_data; // 修改为驼峰命名 83 | try { 84 | existing_data = scalar_storage_.GetScalar(id); 85 | } catch (const std::runtime_error &e) { 86 | // 向量不存在,继续执行插入操作 87 | } 88 | 89 | // 如果存在现有向量,则从索引中删除它 90 | if (existing_data.IsObject()) { // 使用IsObject()检查existingData是否为空 91 | std::vector existing_vector(existing_data["vectors"].Size()); // 从JSON数据中提取vectors字段 92 | for (rapidjson::SizeType i = 0; i < existing_data["vectors"].Size(); ++i) { 93 | existing_vector[i] = existing_data["vectors"][i].GetFloat(); 94 | } 95 | 96 | void *index = IndexFactory::Instance().GetIndex(index_type); 97 | switch (index_type) { 98 | case IndexFactory::IndexType::FLAT: { 99 | auto *faiss_index = static_cast(index); 100 | faiss_index->RemoveVectors({static_cast(id)}); // 将id转换为long类型 101 | break; 102 | } 103 | case IndexFactory::IndexType::HNSW: { 104 | auto *hnsw_index = static_cast(index); 105 | hnsw_index->RemoveVectors({static_cast(id)}); 106 | break; 107 | } 108 | default: 109 | break; 110 | } 111 | } 112 | 113 | // 将新向量插入索引 114 | std::vector new_vector(data["vectors"].Size()); // 从JSON数据中提取vectors字段 115 | for (rapidjson::SizeType i = 0; i < data["vectors"].Size(); ++i) { 116 | new_vector[i] = data["vectors"][i].GetFloat(); 117 | } 118 | 119 | void *index = IndexFactory::Instance().GetIndex(index_type); 120 | switch (index_type) { 121 | case IndexFactory::IndexType::FLAT: { 122 | auto *faiss_index = static_cast(index); 123 | faiss_index->InsertVectors(new_vector, static_cast(id)); 124 | break; 125 | } 126 | case IndexFactory::IndexType::HNSW: { 127 | auto *hnsw_index = static_cast(index); 128 | hnsw_index->InsertVectors(new_vector, static_cast(id)); 129 | break; 130 | } 131 | default: 132 | break; 133 | } 134 | 135 | global_logger->debug("try add new filter"); // 添加打印信息 136 | // 检查客户写入的数据中是否有 int 类型的 JSON 字段 137 | auto *filter_index = static_cast(IndexFactory::Instance().GetIndex(IndexFactory::IndexType::FILTER)); 138 | for (auto it = data.MemberBegin(); it != data.MemberEnd(); ++it) { 139 | std::string field_name = it->name.GetString(); 140 | global_logger->debug("try filter member {} {}", it->value.IsInt(), field_name); // 添加打印信息 141 | if (it->value.IsInt() && field_name != "id") { // 过滤名称为 "id" 的字段 142 | int64_t field_value = it->value.GetInt64(); 143 | int64_t *old_field_value_p = nullptr; 144 | // 如果存在现有向量,则从 FilterIndex 中更新 int 类型字段 145 | if (existing_data.IsObject()) { 146 | old_field_value_p = static_cast(malloc(sizeof(int64_t))); 147 | *old_field_value_p = existing_data[field_name.c_str()].GetInt64(); 148 | } 149 | filter_index->UpdateIntFieldFilter(field_name, old_field_value_p, field_value, id); 150 | delete old_field_value_p; 151 | } 152 | } 153 | 154 | // 更新标量存储中的向量 155 | scalar_storage_.InsertScalar(id, data); 156 | } 157 | 158 | auto VectorDatabase::Query(uint64_t id) -> rapidjson::Document { // 添加query函数实现 159 | return scalar_storage_.GetScalar(id); 160 | } 161 | 162 | 163 | auto VectorDatabase::Search(const rapidjson::Document& json_request) -> std::pair, std::vector> { 164 | // 从 JSON 请求中获取查询参数 165 | std::vector query; 166 | for (const auto& q : json_request[REQUEST_VECTORS].GetArray()) { 167 | query.push_back(q.GetFloat()); 168 | } 169 | int k = json_request[REQUEST_K].GetInt(); 170 | 171 | // 获取请求参数中的索引类型 172 | IndexFactory::IndexType index_type = IndexFactory::IndexType::UNKNOWN; 173 | if (json_request.HasMember(REQUEST_INDEX_TYPE) && json_request[REQUEST_INDEX_TYPE].IsString()) { 174 | std::string index_type_str = json_request[REQUEST_INDEX_TYPE].GetString(); 175 | if (index_type_str == INDEX_TYPE_FLAT) { 176 | index_type = IndexFactory::IndexType::FLAT; 177 | } else if (index_type_str == INDEX_TYPE_HNSW) { 178 | index_type = IndexFactory::IndexType::HNSW; 179 | } 180 | } 181 | 182 | // 检查请求中是否包含 filter 参数 183 | roaring_bitmap_t* filter_bitmap = nullptr; 184 | if (json_request.HasMember("filter") && json_request["filter"].IsObject()) { 185 | const auto& filter = json_request["filter"]; 186 | std::string field_name = filter["fieldName"].GetString(); 187 | std::string op_str = filter["op"].GetString(); 188 | int64_t value = filter["value"].GetInt64(); 189 | 190 | FilterIndex::Operation op = (op_str == "=") ? FilterIndex::Operation::EQUAL : FilterIndex::Operation::NOT_EQUAL; 191 | 192 | // 通过 getGlobalIndexFactory 的 getIndex 方法获取 FilterIndex 193 | auto* filter_index = static_cast(IndexFactory::Instance().GetIndex(IndexFactory::IndexType::FILTER)); 194 | 195 | // 调用 FilterIndex 的 getIntFieldFilterBitmap 方法 196 | filter_bitmap = roaring_bitmap_create(); 197 | filter_index->GetIntFieldFilterBitmap(field_name, op, value, filter_bitmap); 198 | } 199 | 200 | // 使用全局 IndexFactory 获取索引对象 201 | void* index = IndexFactory::Instance().GetIndex(index_type); 202 | 203 | // 根据索引类型初始化索引对象并调用 search_vectors 函数 204 | std::pair, std::vector> results; 205 | switch (index_type) { 206 | case IndexFactory::IndexType::FLAT: { 207 | auto* faiss_index = static_cast(index); 208 | results = faiss_index->SearchVectors(query, k, filter_bitmap); // 将 filter_bitmap 传递给 search_vectors 方法 209 | break; 210 | } 211 | case IndexFactory::IndexType::HNSW: { 212 | auto* hnsw_index = static_cast(index); 213 | results = hnsw_index->SearchVectors(query, k, filter_bitmap); // 将 filter_bitmap 传递给 search_vectors 方法 214 | break; 215 | } 216 | // 在此处添加其他索引类型的处理逻辑 217 | default: 218 | break; 219 | } 220 | delete filter_bitmap; 221 | return results; 222 | } 223 | void VectorDatabase::TakeSnapshot() { // 添加 takeSnapshot 方法实现 224 | persistence_.TakeSnapshot(); 225 | } 226 | 227 | auto VectorDatabase::GetStartIndexId() const -> int64_t { 228 | return persistence_.GetId(); // 通过调用 persistence_ 的 GetID 方法获取起始索引 ID 229 | } 230 | 231 | } // namespace vectordb -------------------------------------------------------------------------------- /src/httpserver/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( 2 | vectorDB_httpserver 3 | OBJECT 4 | http_server.cpp 5 | proxy_server.cpp 6 | master_server.cpp 7 | master_service_impl.cpp 8 | proxy_service_impl.cpp 9 | user_service_impl.cpp 10 | admin_service_impl.cpp 11 | base_service_impl.cpp) 12 | 13 | set(ALL_OBJECT_FILES 14 | ${ALL_OBJECT_FILES} $ 15 | PARENT_SCOPE) -------------------------------------------------------------------------------- /src/httpserver/admin_service_impl.cpp: -------------------------------------------------------------------------------- 1 | #include "httpserver/admin_service_impl.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "common/constants.h" 10 | #include "index/faiss_index.h" 11 | #include "index/hnswlib_index.h" 12 | #include "index/index_factory.h" 13 | #include "logger/logger.h" 14 | 15 | namespace vectordb { 16 | void AdminServiceImpl::snapshot(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 17 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) { 18 | global_logger->debug("Received snapshot request"); 19 | brpc::ClosureGuard done_guard(done); 20 | auto *cntl = static_cast(controller); 21 | 22 | vector_database_->TakeSnapshot(); // 调用 VectorDatabase::takeSnapshot 23 | 24 | rapidjson::Document json_response; 25 | json_response.SetObject(); 26 | rapidjson::Document::AllocatorType &allocator = json_response.GetAllocator(); 27 | 28 | // 设置响应 29 | json_response.AddMember(RESPONSE_RETCODE, RESPONSE_RETCODE_SUCCESS, allocator); 30 | SetJsonResponse(json_response, cntl); 31 | } 32 | 33 | void AdminServiceImpl::SetLeader(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 34 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) { 35 | global_logger->debug("Received setLeader request"); 36 | brpc::ClosureGuard done_guard(done); 37 | auto *cntl = static_cast(controller); 38 | 39 | // 将当前节点设置为主节点 40 | raft_stuff_->EnableElectionTimeout(10000, 20000); 41 | 42 | rapidjson::Document json_response; 43 | json_response.SetObject(); 44 | rapidjson::Document::AllocatorType &allocator = json_response.GetAllocator(); 45 | 46 | // 设置响应 47 | json_response.AddMember(RESPONSE_RETCODE, RESPONSE_RETCODE_SUCCESS, allocator); 48 | SetJsonResponse(json_response, cntl); 49 | } 50 | 51 | void AdminServiceImpl::AddFollower(::google::protobuf::RpcController *controller, 52 | const ::nvm::HttpRequest * /*request*/, ::nvm::HttpResponse * /*response*/, 53 | ::google::protobuf::Closure *done) { 54 | global_logger->debug("Received addFollower request"); 55 | 56 | brpc::ClosureGuard done_guard(done); 57 | auto *cntl = static_cast(controller); 58 | // 解析JSON请求 59 | rapidjson::Document json_request; 60 | json_request.Parse(cntl->request_attachment().to_string().c_str()); 61 | 62 | // 检查JSON文档是否为有效对象 63 | if (!json_request.IsObject()) { 64 | global_logger->error("Invalid JSON request"); 65 | cntl->http_response().set_status_code(400); 66 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Invalid JSON request"); 67 | done->Run(); 68 | return; 69 | } 70 | 71 | // 检查当前节点是否为leader 72 | if (!raft_stuff_->IsLeader()) { 73 | global_logger->error("Current node is not the leader"); 74 | cntl->http_response().set_status_code(400); 75 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Invalid JSON request"); 76 | done->Run(); 77 | return; 78 | } 79 | 80 | // 从JSON请求中获取follower节点信息 81 | int node_id = json_request["nodeId"].GetInt(); 82 | std::string endpoint = json_request["endpoint"].GetString(); 83 | 84 | // 调用 RaftStuff 的 addSrv 方法将新的follower节点添加到集群中 85 | bool success = raft_stuff_->AddSrv(node_id, endpoint); 86 | 87 | if (!success) { 88 | global_logger->error("raft_stuff AddSrv failed"); 89 | cntl->http_response().set_status_code(400); 90 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "raft_stuff AddSrv failed"); 91 | done->Run(); 92 | return; 93 | } 94 | 95 | rapidjson::Document json_response; 96 | json_response.SetObject(); 97 | rapidjson::Document::AllocatorType &allocator = json_response.GetAllocator(); 98 | 99 | // 设置响应 100 | json_response.AddMember(RESPONSE_RETCODE, RESPONSE_RETCODE_SUCCESS, allocator); 101 | SetJsonResponse(json_response, cntl); 102 | } 103 | 104 | void AdminServiceImpl::ListNode(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 105 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) { 106 | global_logger->debug("Received listNode request"); 107 | brpc::ClosureGuard done_guard(done); 108 | auto *cntl = static_cast(controller); 109 | 110 | // 获取所有节点信息 111 | auto nodes_info = raft_stuff_->GetAllNodesInfo(); 112 | 113 | rapidjson::Document json_response; 114 | json_response.SetObject(); 115 | rapidjson::Document::AllocatorType &allocator = json_response.GetAllocator(); 116 | 117 | // 将节点信息添加到JSON响应中 118 | rapidjson::Value nodes_array(rapidjson::kArrayType); 119 | for (const auto &node_info : nodes_info) { 120 | rapidjson::Value node_object(rapidjson::kObjectType); 121 | node_object.AddMember("nodeId", std::get<0>(node_info), allocator); 122 | node_object.AddMember("endpoint", rapidjson::Value(std::get<1>(node_info).c_str(), allocator), allocator); 123 | node_object.AddMember("state", rapidjson::Value(std::get<2>(node_info).c_str(), allocator), 124 | allocator); // 添加节点状态 125 | node_object.AddMember("last_log_idx", std::get<3>(node_info), allocator); // 添加节点最后日志索引 126 | node_object.AddMember("last_succ_resp_us", std::get<4>(node_info), allocator); // 添加节点最后成功响应时间 127 | nodes_array.PushBack(node_object, allocator); 128 | } 129 | json_response.AddMember("nodes", nodes_array, allocator); 130 | 131 | // 设置响应 132 | json_response.AddMember(RESPONSE_RETCODE, RESPONSE_RETCODE_SUCCESS, allocator); 133 | SetJsonResponse(json_response, cntl); 134 | } 135 | 136 | void AdminServiceImpl::GetNode(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 137 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) { 138 | global_logger->debug("Received getNode request"); 139 | brpc::ClosureGuard done_guard(done); 140 | auto *cntl = static_cast(controller); 141 | 142 | // 获取所有节点信息 143 | std::tuple node_info = 144 | raft_stuff_->GetCurrentNodesInfo(); 145 | 146 | rapidjson::Document json_response; 147 | json_response.SetObject(); 148 | rapidjson::Document::AllocatorType &allocator = json_response.GetAllocator(); 149 | 150 | // 将节点信息添加到JSON响应中 151 | rapidjson::Value nodes_array(rapidjson::kArrayType); 152 | rapidjson::Value node_object(rapidjson::kObjectType); 153 | node_object.AddMember("nodeId", std::get<0>(node_info), allocator); 154 | node_object.AddMember("endpoint", rapidjson::Value(std::get<1>(node_info).c_str(), allocator), allocator); 155 | node_object.AddMember("state", rapidjson::Value(std::get<2>(node_info).c_str(), allocator), 156 | allocator); // 添加节点状态 157 | node_object.AddMember("last_log_idx", std::get<3>(node_info), allocator); // 添加节点最后日志索引 158 | node_object.AddMember("last_succ_resp_us", std::get<4>(node_info), allocator); // 添加节点最后成功响应时间 159 | 160 | json_response.AddMember("node", node_object, allocator); 161 | 162 | // 设置响应 163 | json_response.AddMember(RESPONSE_RETCODE, RESPONSE_RETCODE_SUCCESS, allocator); 164 | SetJsonResponse(json_response, cntl); 165 | } 166 | 167 | } // namespace vectordb -------------------------------------------------------------------------------- /src/httpserver/base_service_impl.cpp: -------------------------------------------------------------------------------- 1 | #include "httpserver/base_service_impl.h" 2 | #include 3 | 4 | namespace vectordb { 5 | void BaseServiceImpl::SetJsonResponse(const rapidjson::Document &json_response, brpc::Controller *cntl) { 6 | rapidjson::StringBuffer buffer; 7 | rapidjson::Writer writer(buffer); 8 | json_response.Accept(writer); 9 | cntl->response_attachment().append(buffer.GetString()); 10 | cntl->http_response().set_content_type(RESPONSE_CONTENT_TYPE_JSON); 11 | } 12 | 13 | void BaseServiceImpl::SetTextResponse(const std::string &response, brpc::Controller *cntl) { 14 | cntl->response_attachment().append(response); 15 | cntl->http_response().set_content_type(RESPONSE_CONTENT_TYPE_TEXT); 16 | } 17 | 18 | void BaseServiceImpl::SetJsonResponse(const std::string &response, brpc::Controller *cntl) { 19 | cntl->response_attachment().append(response); 20 | cntl->http_response().set_content_type(RESPONSE_CONTENT_TYPE_TEXT); 21 | } 22 | 23 | void BaseServiceImpl::SetJsonResponse(const std::string &response, brpc::Controller *cntl,int status_code) { 24 | cntl->response_attachment().append(response); 25 | cntl->http_response().set_status_code(status_code); 26 | cntl->http_response().set_content_type(RESPONSE_CONTENT_TYPE_TEXT); 27 | } 28 | 29 | void BaseServiceImpl::SetTextResponse(const std::string &response, brpc::Controller *cntl,int status_code) { 30 | cntl->response_attachment().append(response); 31 | cntl->http_response().set_status_code(status_code); 32 | cntl->http_response().set_content_type(RESPONSE_CONTENT_TYPE_TEXT); 33 | } 34 | 35 | void BaseServiceImpl::SetErrorJsonResponse(brpc::Controller *cntl, int error_code, const std::string &errorMsg) { 36 | rapidjson::Document json_response; 37 | json_response.SetObject(); 38 | rapidjson::Document::AllocatorType &allocator = json_response.GetAllocator(); 39 | json_response.AddMember(RESPONSE_RETCODE, error_code, allocator); 40 | json_response.AddMember(RESPONSE_ERROR_MSG, rapidjson::StringRef(errorMsg.c_str()), allocator); // 使用宏定义 41 | SetJsonResponse(json_response, cntl); 42 | } 43 | 44 | auto BaseServiceImpl::IsRequestValid(const rapidjson::Document &json_request, CheckType check_type) -> bool { 45 | switch (check_type) { 46 | case CheckType::SEARCH: 47 | return json_request.HasMember(REQUEST_VECTORS) && json_request.HasMember(REQUEST_K) && 48 | (!json_request.HasMember(REQUEST_INDEX_TYPE) || json_request[REQUEST_INDEX_TYPE].IsString()); 49 | case CheckType::INSERT: 50 | case CheckType::UPSERT: 51 | return json_request.HasMember(REQUEST_VECTORS) && json_request.HasMember(REQUEST_ID) && 52 | (!json_request.HasMember(REQUEST_INDEX_TYPE) || json_request[REQUEST_INDEX_TYPE].IsString()); 53 | default: 54 | return false; 55 | } 56 | } 57 | 58 | void BaseServiceImpl::SetResponse(brpc::Controller *cntl, int retCode, const std::string &msg, 59 | const rapidjson::Document *data) { 60 | rapidjson::Document doc; 61 | doc.SetObject(); 62 | rapidjson::Document::AllocatorType &allocator = doc.GetAllocator(); 63 | 64 | doc.AddMember("retCode", retCode, allocator); 65 | doc.AddMember("msg", rapidjson::Value(msg.c_str(), allocator), allocator); 66 | 67 | if (data != nullptr && data->IsObject()) { 68 | rapidjson::Value data_value(rapidjson::kObjectType); 69 | data_value.CopyFrom(*data, allocator); // 正确地复制 data 70 | doc.AddMember("data", data_value, allocator); 71 | } 72 | SetJsonResponse(doc, cntl); 73 | } 74 | 75 | auto BaseServiceImpl::GetIndexTypeFromRequest(const rapidjson::Document &json_request) 76 | -> vectordb::IndexFactory::IndexType { 77 | // 获取请求参数中的索引类型 78 | if (json_request.HasMember(REQUEST_INDEX_TYPE)) { 79 | std::string index_type_str = json_request[REQUEST_INDEX_TYPE].GetString(); 80 | if (index_type_str == "FLAT") { 81 | return vectordb::IndexFactory::IndexType::FLAT; 82 | } 83 | if (index_type_str == "HNSW") { 84 | return vectordb::IndexFactory::IndexType::HNSW; 85 | } 86 | } 87 | return vectordb::IndexFactory::IndexType::UNKNOWN; 88 | } 89 | 90 | auto BaseServiceImpl::WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) -> size_t { 91 | (static_cast(userp))->append(static_cast(contents), size * nmemb); 92 | return size * nmemb; 93 | } 94 | } // namespace vectordb 95 | -------------------------------------------------------------------------------- /src/httpserver/http_server.cpp: -------------------------------------------------------------------------------- 1 | #include "httpserver/http_server.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "common/constants.h" 9 | #include "index/faiss_index.h" 10 | #include "index/hnswlib_index.h" 11 | #include "index/index_factory.h" 12 | #include "logger/logger.h" 13 | 14 | namespace vectordb { 15 | auto HttpServer::Init(VectorDatabase *vector_database,RaftStuff *raft_stuff) -> bool{ 16 | vector_database_ = vector_database; 17 | raft_stuff_ = raft_stuff; 18 | user_service_impl_ = std::make_unique(vector_database_,raft_stuff); 19 | admin_service_impl_ = std::make_unique(vector_database_,raft_stuff); 20 | if (AddService(user_service_impl_.get(), brpc::SERVER_DOESNT_OWN_SERVICE) != 0) { 21 | global_logger->error("Failed to add http_service_impl"); 22 | return false; 23 | } 24 | if (AddService(admin_service_impl_.get(), brpc::SERVER_DOESNT_OWN_SERVICE) != 0) { 25 | global_logger->error("Failed to add admin_service_impl"); 26 | return false; 27 | } 28 | global_logger->info("HttpServer init success"); 29 | return true; 30 | } 31 | } // namespace vectordb -------------------------------------------------------------------------------- /src/httpserver/master_server.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "httpserver/master_server.h" 4 | #include 5 | #include 6 | #include "httpserver/master_service_impl.h" 7 | #include "logger/logger.h" 8 | #include "rapidjson/document.h" 9 | #include "rapidjson/stringbuffer.h" 10 | #include "rapidjson/writer.h" 11 | 12 | namespace vectordb { 13 | auto MasterServer::Init(const std::string &etcdEndpoints) -> bool { 14 | master_service_impl_ = std::make_unique(etcdEndpoints); 15 | 16 | if (AddService(master_service_impl_.get(), brpc::SERVER_DOESNT_OWN_SERVICE) != 0) { 17 | global_logger->error("Failed to add master_service_impl_"); 18 | return false; 19 | } 20 | running_ = true; 21 | StartNodeUpdateTimer(); 22 | global_logger->info("MasterServer init success"); 23 | return true; 24 | } 25 | 26 | MasterServer::~MasterServer() { 27 | running_ = false; // 停止定时器循环 28 | } 29 | 30 | void MasterServer::StartNodeUpdateTimer() { 31 | std::thread([this]() { 32 | while (running_) { // 这里可能需要一种更优雅的退出机制 33 | std::this_thread::sleep_for(std::chrono::seconds(10)); // 每10秒更新一次 34 | if (master_service_impl_ == nullptr) { 35 | global_logger->error("master_service_impl_ empty"); 36 | } else { 37 | master_service_impl_->UpdateNodeStates(); 38 | } 39 | } 40 | }).detach(); 41 | } 42 | 43 | } // namespace vectordb 44 | -------------------------------------------------------------------------------- /src/httpserver/proxy_server.cpp: -------------------------------------------------------------------------------- 1 | #include "httpserver/proxy_server.h" 2 | #include 3 | #include 4 | #include "httpserver/proxy_service_impl.h" 5 | #include "logger/logger.h" // 包含 rapidjson 头文件 6 | #include "rapidjson/document.h" 7 | #include "rapidjson/stringbuffer.h" 8 | #include "rapidjson/writer.h" 9 | namespace vectordb { 10 | 11 | auto ProxyServer::Init(const std::string &masterServerHost, int masterServerPort, int instanceId, 12 | const std::set &read_path, const std::set &write_paths) -> bool { 13 | master_server_host_ = masterServerHost; 14 | master_server_port_ = masterServerPort; 15 | instance_id_ = instanceId; 16 | read_paths_ = read_path; 17 | write_paths_ = write_paths; 18 | InitCurl(); 19 | proxy_service_impl_ = std::make_unique(read_path, write_paths, masterServerHost, masterServerPort, 20 | instanceId, curl_handle_); 21 | if (masterServerHost.empty()) { 22 | global_logger->info("zhouzj print test ProxyServer::Init() after ProxyServiceImpl, masterServerHost empty"); 23 | } else { 24 | global_logger->info("zhouzj print test ProxyServer::Init() after ProxyServiceImpl, masterServerHost not empty"); 25 | } 26 | if (AddService(proxy_service_impl_.get(), brpc::SERVER_DOESNT_OWN_SERVICE) != 0) { 27 | global_logger->error("Failed to add http_service_impl"); 28 | return false; 29 | } 30 | running_ = true; 31 | FetchAndUpdateNodes(); 32 | FetchAndUpdatePartitionConfig(); 33 | 34 | StartNodeUpdateTimer(); // 启动节点更新定时器 35 | StartPartitionUpdateTimer(); // 启动分区配置更新定时器 36 | 37 | global_logger->info("ProxyServer init success"); 38 | return true; 39 | } 40 | 41 | ProxyServer::~ProxyServer() { 42 | running_ = false; // 停止定时器循环 43 | CleanupCurl(); 44 | } 45 | 46 | void ProxyServer::InitCurl() { 47 | curl_global_init(CURL_GLOBAL_DEFAULT); 48 | curl_handle_ = curl_easy_init(); 49 | if (curl_handle_ != nullptr) { 50 | curl_easy_setopt(curl_handle_, CURLOPT_TCP_KEEPALIVE, 1L); 51 | curl_easy_setopt(curl_handle_, CURLOPT_TCP_KEEPIDLE, 120L); 52 | curl_easy_setopt(curl_handle_, CURLOPT_TCP_KEEPINTVL, 60L); 53 | } 54 | } 55 | 56 | void ProxyServer::CleanupCurl() { 57 | if (curl_handle_ != nullptr) { 58 | curl_easy_cleanup(curl_handle_); 59 | } 60 | curl_global_cleanup(); 61 | } 62 | 63 | void ProxyServer::FetchAndUpdateNodes() { 64 | if (proxy_service_impl_ == nullptr) { 65 | global_logger->error("proxy_service_impl_ empty"); 66 | } else { 67 | proxy_service_impl_->FetchAndUpdateNodes(); 68 | } 69 | } 70 | 71 | void ProxyServer::FetchAndUpdatePartitionConfig() { 72 | if (proxy_service_impl_ == nullptr) { 73 | global_logger->error("proxy_service_impl_ empty"); 74 | } else { 75 | proxy_service_impl_->FetchAndUpdatePartitionConfig(); 76 | } 77 | } 78 | 79 | void ProxyServer::StartNodeUpdateTimer() { 80 | std::thread([this]() { 81 | while (running_) { 82 | std::this_thread::sleep_for(std::chrono::seconds(30)); 83 | if (proxy_service_impl_ == nullptr) { 84 | global_logger->error("proxy_service_impl_ empty"); 85 | } else { 86 | proxy_service_impl_->FetchAndUpdateNodes(); 87 | } 88 | } 89 | }).detach(); 90 | } 91 | 92 | void ProxyServer::StartPartitionUpdateTimer() { 93 | std::thread([this]() { 94 | while (running_) { 95 | std::this_thread::sleep_for(std::chrono::minutes(1)); // 假设每5分钟更新一次,可以根据需要调整 96 | if (proxy_service_impl_ == nullptr) { 97 | global_logger->error("proxy_service_impl_ empty"); 98 | } else { 99 | proxy_service_impl_->FetchAndUpdatePartitionConfig(); 100 | } 101 | } 102 | }).detach(); 103 | } 104 | 105 | } // namespace vectordb -------------------------------------------------------------------------------- /src/httpserver/user_service_impl.cpp: -------------------------------------------------------------------------------- 1 | #include "httpserver/user_service_impl.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "common/constants.h" 10 | #include "index/faiss_index.h" 11 | #include "index/hnswlib_index.h" 12 | #include "index/index_factory.h" 13 | #include "logger/logger.h" 14 | namespace vectordb { 15 | void UserServiceImpl::search(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 16 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) { 17 | global_logger->debug("Received search request"); 18 | 19 | brpc::ClosureGuard done_guard(done); 20 | auto *cntl = static_cast(controller); 21 | 22 | // 解析JSON请求 23 | rapidjson::Document json_request; 24 | json_request.Parse(cntl->request_attachment().to_string().c_str()); 25 | 26 | // 打印用户的输入参数 27 | global_logger->info("Search request parameters: {}", cntl->request_attachment().to_string()); 28 | 29 | // 检查JSON文档是否为有效对象 30 | if (!json_request.IsObject()) { 31 | global_logger->error("Invalid JSON request"); 32 | cntl->http_response().set_status_code(400); 33 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Invalid JSON request"); 34 | done->Run(); 35 | return; 36 | } 37 | 38 | // 检查请求的合法性 39 | if (!IsRequestValid(json_request, BaseServiceImpl::CheckType::SEARCH)) { 40 | global_logger->error("Missing vectors or k parameter in the request"); 41 | cntl->http_response().set_status_code(400); 42 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Missing vectors or k parameter in the request"); 43 | done->Run(); 44 | return; 45 | } 46 | 47 | // 获取查询参数 48 | std::vector query; 49 | for (const auto &q : json_request[REQUEST_VECTORS].GetArray()) { 50 | query.push_back(q.GetFloat()); 51 | } 52 | int k = json_request[REQUEST_K].GetInt(); 53 | 54 | global_logger->debug("Query parameters: k = {}", k); 55 | 56 | // 获取请求参数中的索引类型 57 | IndexFactory::IndexType index_type = GetIndexTypeFromRequest(json_request); 58 | 59 | // 如果索引类型为UNKNOWN,返回400错误 60 | if (index_type == IndexFactory::IndexType::UNKNOWN) { 61 | global_logger->error("Invalid indexType parameter in the request"); 62 | cntl->http_response().set_status_code(400); 63 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Invalid indexType parameter in the request"); 64 | done->Run(); 65 | return; 66 | } 67 | 68 | // 使用 VectorDatabase 的 search 接口执行查询 69 | std::pair, std::vector> results = vector_database_->Search(json_request); 70 | 71 | // 将结果转换为JSON 72 | rapidjson::Document json_response; 73 | json_response.SetObject(); 74 | rapidjson::Document::AllocatorType &allocator = json_response.GetAllocator(); 75 | 76 | // 检查是否有有效的搜索结果 77 | bool valid_results = false; 78 | rapidjson::Value vectors(rapidjson::kArrayType); 79 | rapidjson::Value distances(rapidjson::kArrayType); 80 | for (size_t i = 0; i < results.first.size(); ++i) { 81 | if (results.first[i] != -1) { 82 | valid_results = true; 83 | vectors.PushBack(results.first[i], allocator); 84 | distances.PushBack(results.second[i], allocator); 85 | } 86 | } 87 | 88 | if (valid_results) { 89 | json_response.AddMember(RESPONSE_VECTORS, vectors, allocator); 90 | json_response.AddMember(RESPONSE_DISTANCES, distances, allocator); 91 | } 92 | 93 | // 设置响应 94 | json_response.AddMember(RESPONSE_RETCODE, RESPONSE_RETCODE_SUCCESS, allocator); 95 | SetJsonResponse(json_response, cntl); 96 | } 97 | 98 | void UserServiceImpl::insert(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 99 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) { 100 | global_logger->debug("Received insert request"); 101 | brpc::ClosureGuard done_guard(done); 102 | auto *cntl = static_cast(controller); 103 | // 解析JSON请求 104 | rapidjson::Document json_request; 105 | json_request.Parse(cntl->request_attachment().to_string().c_str()); 106 | 107 | // 打印用户的输入参数 108 | global_logger->info("Insert request parameters: {}", cntl->request_attachment().to_string()); 109 | 110 | // 检查JSON文档是否为有效对象 111 | if (!json_request.IsObject()) { 112 | global_logger->error("Invalid JSON request"); 113 | cntl->http_response().set_status_code(400); 114 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Invalid JSON request"); 115 | return; 116 | } 117 | 118 | // 检查请求的合法性 119 | if (!IsRequestValid(json_request, BaseServiceImpl::CheckType::INSERT)) { // 添加对isRequestValid的调用 120 | global_logger->error("Missing vectors or id parameter in the request"); 121 | cntl->http_response().set_status_code(400); 122 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Missing vectors or k parameter in the request"); 123 | return; 124 | } 125 | 126 | // 获取插入参数 127 | std::vector data; 128 | for (const auto &d : json_request[REQUEST_VECTORS].GetArray()) { 129 | data.push_back(d.GetFloat()); 130 | } 131 | uint64_t label = json_request[REQUEST_ID].GetUint64(); // 使用宏定义 132 | 133 | global_logger->debug("Insert parameters: label = {}", label); 134 | 135 | // 获取请求参数中的索引类型 136 | IndexFactory::IndexType index_type = GetIndexTypeFromRequest(json_request); 137 | 138 | // 如果索引类型为UNKNOWN,返回400错误 139 | if (index_type == IndexFactory::IndexType::UNKNOWN) { 140 | global_logger->error("Invalid indexType parameter in the request"); 141 | cntl->http_response().set_status_code(400); 142 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Invalid indexType parameter in the request"); 143 | return; 144 | } 145 | 146 | // 使用全局IndexFactory获取索引对象 147 | void *index = IndexFactory::Instance().GetIndex(index_type); 148 | assert(index != nullptr); 149 | 150 | // 根据索引类型初始化索引对象并调用insert_vectors函数 151 | switch (index_type) { 152 | case IndexFactory::IndexType::FLAT: { 153 | auto *faiss_index = static_cast(index); 154 | faiss_index->InsertVectors(data, label); 155 | break; 156 | } 157 | case IndexFactory::IndexType::HNSW: { 158 | auto *hnsw_index = static_cast(index); 159 | hnsw_index->InsertVectors(data, label); 160 | break; 161 | } 162 | // 在此处添加其他索引类型的处理逻辑 163 | default: 164 | break; 165 | } 166 | 167 | // 设置响应 168 | rapidjson::Document json_response; 169 | json_response.SetObject(); 170 | rapidjson::Document::AllocatorType &allocator = json_response.GetAllocator(); 171 | 172 | // 添加retCode到响应 173 | json_response.AddMember(RESPONSE_RETCODE, RESPONSE_RETCODE_SUCCESS, allocator); 174 | 175 | SetJsonResponse(json_response, cntl); 176 | } 177 | 178 | void UserServiceImpl::upsert(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 179 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) { 180 | global_logger->debug("Received upsert request"); 181 | brpc::ClosureGuard done_guard(done); 182 | auto *cntl = static_cast(controller); 183 | 184 | // 解析JSON请求 185 | rapidjson::Document json_request; 186 | json_request.Parse(cntl->request_attachment().to_string().c_str()); 187 | 188 | global_logger->info("Upsert request parameters: {}", cntl->request_attachment().to_string()); 189 | 190 | // 检查JSON文档是否为有效对象 191 | if (!json_request.IsObject()) { 192 | global_logger->error("Invalid JSON request"); 193 | cntl->http_response().set_status_code(400); 194 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Invalid JSON request"); 195 | return; 196 | } 197 | 198 | // 检查请求的合法性 199 | if (!IsRequestValid(json_request, BaseServiceImpl::CheckType::UPSERT)) { 200 | global_logger->error("Missing vectors or id parameter in the request"); 201 | cntl->http_response().set_status_code(400); 202 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Missing vectors or id parameter in the request"); 203 | return; 204 | } 205 | 206 | // uint64_t label = json_request[REQUEST_ID].GetUint64(); 207 | 208 | // // 获取请求参数中的索引类型 209 | // IndexFactory::IndexType index_type = GetIndexTypeFromRequest(json_request); 210 | 211 | // 调用 RaftStuff 的 appendEntries 方法将新的日志条目添加到集群中 212 | raft_stuff_->AppendEntries(cntl->request_attachment().to_string()); 213 | 214 | // vector_database_->Upsert(label, json_request, index_type); 215 | // // 在 upsert 调用之后调用 VectorDatabase::writeWALLog 216 | // vector_database_->WriteWalLog("upsert", json_request); 217 | 218 | rapidjson::Document json_response; 219 | json_response.SetObject(); 220 | rapidjson::Document::AllocatorType &response_allocator = json_response.GetAllocator(); 221 | 222 | // 添加retCode到响应 223 | json_response.AddMember(RESPONSE_RETCODE, RESPONSE_RETCODE_SUCCESS, response_allocator); 224 | 225 | SetJsonResponse(json_response, cntl); 226 | } 227 | 228 | void UserServiceImpl::query(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 229 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) { 230 | global_logger->debug("Received query request"); 231 | 232 | brpc::ClosureGuard done_guard(done); 233 | auto *cntl = static_cast(controller); 234 | 235 | // 解析JSON请求 236 | rapidjson::Document json_request; 237 | json_request.Parse(cntl->request_attachment().to_string().c_str()); 238 | 239 | // 检查JSON文档是否为有效对象 240 | if (!json_request.IsObject()) { 241 | global_logger->error("Invalid JSON request"); 242 | cntl->http_response().set_status_code(400); 243 | SetErrorJsonResponse(cntl, RESPONSE_RETCODE_ERROR, "Invalid JSON request"); 244 | return; 245 | } 246 | 247 | // 从JSON请求中获取ID 248 | uint64_t id = json_request[REQUEST_ID].GetUint64(); // 使用宏REQUEST_ID 249 | 250 | // 查询JSON数据 251 | rapidjson::Document json_data = vector_database_->Query(id); 252 | 253 | // 将结果转换为JSON 254 | rapidjson::Document json_response; 255 | json_response.SetObject(); 256 | rapidjson::Document::AllocatorType &allocator = json_response.GetAllocator(); 257 | 258 | // 如果查询到向量,则将json_data对象的内容合并到json_response对象中 259 | if (!json_data.IsNull()) { 260 | for (auto it = json_data.MemberBegin(); it != json_data.MemberEnd(); ++it) { 261 | json_response.AddMember(it->name, it->value, allocator); 262 | } 263 | } 264 | 265 | // 设置响应 266 | json_response.AddMember(RESPONSE_RETCODE, RESPONSE_RETCODE_SUCCESS, allocator); 267 | SetJsonResponse(json_response, cntl); 268 | } 269 | 270 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/cluster/in_memory_log_store.h: -------------------------------------------------------------------------------- 1 | /************************************************************************ 2 | Copyright 2017-2019 eBay Inc. 3 | Author/Developer(s): Jung-Sang Ahn 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | https://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | **************************************************************************/ 17 | 18 | #pragma once 19 | 20 | #include 21 | #include 22 | #include 23 | #include "libnuraft/event_awaiter.h" 24 | #include "libnuraft/internal_timer.hxx" 25 | #include "libnuraft/log_store.hxx" 26 | #include "libnuraft/pp_util.hxx" 27 | #include "database/vector_database.h" // 包含 VectorDatabase 类的头文件 28 | #include "libnuraft/raft_server.hxx" 29 | namespace vectordb { 30 | 31 | // class raft_server; 32 | 33 | class InmemLogStore : public nuraft::log_store { 34 | public: 35 | explicit InmemLogStore(VectorDatabase* vector_database); 36 | 37 | ~InmemLogStore() override; 38 | 39 | // NOLINTNEXTLINE 40 | __nocopy__(InmemLogStore); 41 | 42 | public: 43 | auto next_slot() const -> ulong override; 44 | 45 | auto start_index() const -> ulong override; 46 | 47 | auto last_entry() const -> nuraft::ptr override; 48 | 49 | auto append(nuraft::ptr &entry) -> ulong override; 50 | 51 | void write_at(ulong index, nuraft::ptr &entry) override; 52 | 53 | auto log_entries(ulong start, ulong end) -> nuraft::ptr>> override; 54 | 55 | auto log_entries_ext(nuraft::ulong start, nuraft::ulong end, nuraft::int64 batch_size_hint_in_bytes = 0) 56 | -> nuraft::ptr>> override; 57 | 58 | auto entry_at(ulong index) -> nuraft::ptr override; 59 | 60 | auto term_at(ulong index) -> ulong override; 61 | 62 | auto pack(ulong index, nuraft::int32 cnt) -> nuraft::ptr override; 63 | 64 | void apply_pack(ulong index, nuraft::buffer &pack) override; 65 | 66 | auto compact(ulong last_log_index) -> bool override; 67 | 68 | auto flush() -> bool override; 69 | 70 | void Close(); 71 | 72 | auto last_durable_index() -> ulong override; 73 | 74 | void SetDiskDelay(nuraft::raft_server *raft, size_t delay_ms); 75 | 76 | private: 77 | static auto MakeClone(const nuraft::ptr &entry) -> nuraft::ptr; 78 | 79 | void DiskEmulLoop(); 80 | 81 | /** 82 | * Map of . 83 | */ 84 | std::map> logs_; 85 | 86 | /** 87 | * Lock for `logs_`. 88 | */ 89 | mutable std::mutex logs_lock_; 90 | 91 | /** 92 | * The index of the first log. 93 | */ 94 | std::atomic start_idx_; 95 | 96 | /** 97 | * Backward pointer to Raft server. 98 | */ 99 | nuraft::raft_server *raft_server_bwd_pointer_; 100 | 101 | // Testing purpose --------------- BEGIN 102 | 103 | /** 104 | * If non-zero, this log store will emulate the disk write delay. 105 | */ 106 | std::atomic disk_emul_delay_; 107 | 108 | /** 109 | * Map of , emulating logs that is being written to disk. 110 | * Log index will be regarded as "durable" after the corresponding timestamp. 111 | */ 112 | std::map disk_emul_logs_being_written_; 113 | 114 | /** 115 | * Thread that will update `last_durable_index_` and call 116 | * `notify_log_append_completion` at proper time. 117 | */ 118 | std::unique_ptr disk_emul_thread_; 119 | 120 | /** 121 | * Flag to terminate the thread. 122 | */ 123 | std::atomic disk_emul_thread_stop_signal_; 124 | 125 | /** 126 | * Event awaiter that emulates disk delay. 127 | */ 128 | EventAwaiter disk_emul_ea_; 129 | 130 | /** 131 | * Last written log index. 132 | */ 133 | std::atomic disk_emul_last_durable_index_; 134 | 135 | VectorDatabase* vector_database_; // 添加一个 VectorDatabase 指针成员变量 136 | // Testing purpose --------------- END 137 | }; 138 | 139 | } // namespace vectordb 140 | -------------------------------------------------------------------------------- /src/include/cluster/in_memory_state_mgr.h: -------------------------------------------------------------------------------- 1 | /************************************************************************ 2 | Copyright 2017-2019 eBay Inc. 3 | Author/Developer(s): Jung-Sang Ahn 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | https://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | **************************************************************************/ 17 | 18 | #pragma once 19 | 20 | #include "cluster/in_memory_log_store.h" 21 | #include 22 | #include "libnuraft/nuraft.hxx" 23 | #include "database/vector_database.h" 24 | namespace vectordb { 25 | 26 | class InmemStateMgr : public nuraft::state_mgr { 27 | public: 28 | InmemStateMgr(int srv_id, const std::string &endpoint, VectorDatabase* vector_database) 29 | : my_id_(srv_id), my_endpoint_(endpoint), cur_log_store_(nuraft::cs_new(vector_database)) { 30 | my_srv_config_ = nuraft::cs_new(srv_id, endpoint); 31 | 32 | // Initial cluster config: contains only one server (myself). 33 | saved_config_ = nuraft::cs_new(); 34 | saved_config_->get_servers().push_back(my_srv_config_); 35 | } 36 | 37 | ~InmemStateMgr() override = default; 38 | 39 | auto load_config() -> nuraft::ptr override { 40 | // Just return in-memory data in this example. 41 | // May require reading from disk here, if it has been written to disk. 42 | return saved_config_; 43 | } 44 | 45 | void save_config(const nuraft::cluster_config &config) override { 46 | // Just keep in memory in this example. 47 | // Need to write to disk here, if want to make it durable. 48 | nuraft::ptr buf = config.serialize(); 49 | saved_config_ = nuraft::cluster_config::deserialize(*buf); 50 | } 51 | 52 | void save_state(const nuraft::srv_state &state) override { 53 | // Just keep in memory in this example. 54 | // Need to write to disk here, if want to make it durable. 55 | nuraft::ptr buf = state.serialize(); 56 | saved_state_ = nuraft::srv_state::deserialize(*buf); 57 | } 58 | 59 | auto read_state() -> nuraft::ptr override { 60 | // Just return in-memory data in this example. 61 | // May require reading from disk here, if it has been written to disk. 62 | return saved_state_; 63 | } 64 | 65 | auto load_log_store() -> nuraft::ptr override { 66 | return std::static_pointer_cast(cur_log_store_); 67 | } 68 | 69 | auto server_id() -> nuraft::int32 override { return my_id_; } 70 | 71 | void system_exit(const int exit_code) override {} 72 | 73 | auto GetSrvConfig() const -> nuraft::ptr { return my_srv_config_; } 74 | 75 | private: 76 | int my_id_; 77 | std::string my_endpoint_; 78 | nuraft::ptr cur_log_store_; 79 | nuraft::ptr my_srv_config_; 80 | nuraft::ptr saved_config_; 81 | nuraft::ptr saved_state_; 82 | }; 83 | 84 | } // namespace vectordb 85 | -------------------------------------------------------------------------------- /src/include/cluster/log_state_machine.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "database/vector_database.h" 5 | namespace vectordb { 6 | class LogStateMachine : public nuraft::state_machine { 7 | public: 8 | LogStateMachine() : last_committed_idx_(0) {} 9 | ~LogStateMachine() override = default; 10 | void SetVectorDatabase(VectorDatabase *vector_database); // 重命名为 setVectorDatabase 11 | auto commit(nuraft::ulong log_idx, nuraft::buffer &data) -> nuraft::ptr override; 12 | 13 | auto pre_commit(nuraft::ulong log_idx, nuraft::buffer &data) -> nuraft::ptr override; 14 | void commit_config(const nuraft::ulong log_idx, nuraft::ptr &new_conf) override { 15 | // Nothing to do with configuration change. Just update committed index. 16 | last_committed_idx_ = log_idx; 17 | } 18 | void rollback(const nuraft::ulong log_idx, nuraft::buffer &data) override {} 19 | auto read_logical_snp_obj(nuraft::snapshot &s, void *&user_snp_ctx, nuraft::ulong obj_id, 20 | nuraft::ptr &data_out, bool &is_last_obj) -> int override { 21 | return 0; 22 | } 23 | void save_logical_snp_obj(nuraft::snapshot &s, nuraft::ulong &obj_id, nuraft::buffer &data, bool is_first_obj, 24 | bool is_last_obj) override {} 25 | auto apply_snapshot(nuraft::snapshot &s) -> bool override { return true; } 26 | void free_user_snp_ctx(void *&user_snp_ctx) override {} 27 | auto last_snapshot() -> nuraft::ptr override { return nullptr; } 28 | auto last_commit_index() -> nuraft::ulong override { return last_committed_idx_; } 29 | 30 | void create_snapshot(nuraft::snapshot &s, nuraft::async_result::handler_type &when_done) override {} 31 | 32 | private: 33 | // Last committed Raft log number. 34 | std::atomic last_committed_idx_; 35 | VectorDatabase *vector_database_; // 添加一个 VectorDatabase 指针成员变量 36 | }; 37 | 38 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/cluster/raft_logger_wrapper.h: -------------------------------------------------------------------------------- 1 | /************************************************************************ 2 | Copyright 2017-2019 eBay Inc. 3 | Author/Developer(s): Jung-Sang Ahn 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | https://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | **************************************************************************/ 17 | 18 | #pragma once 19 | 20 | #include 21 | #include "cluster/raft_logger.h" 22 | 23 | namespace vectordb { 24 | 25 | /** 26 | * Example implementation of Raft logger, on top of SimpleLogger. 27 | */ 28 | class LoggerWrapper : public nuraft::logger { 29 | public: 30 | explicit LoggerWrapper(const std::string &log_file, int log_level = 6) { 31 | my_log_ = new SimpleLogger(log_file, 1024, 32 * 1024 * 1024, 10); 32 | my_log_->SetLogLevel(log_level); 33 | my_log_->SetDispLevel(-1); 34 | vectordb::SimpleLogger::SetCrashDumpPath("./", true); 35 | my_log_->Start(); 36 | } 37 | 38 | ~LoggerWrapper() override { Destroy(); } 39 | 40 | void Destroy() { 41 | if (my_log_ != nullptr) { 42 | my_log_->FlushAll(); 43 | my_log_->Stop(); 44 | delete my_log_; 45 | my_log_ = nullptr; 46 | } 47 | } 48 | 49 | void put_details(int level, const char *source_file, const char *func_name, size_t line_number, 50 | const std::string &msg) override { 51 | if (my_log_ != nullptr) { 52 | my_log_->Put(level, source_file, func_name, line_number, "%s", msg.c_str()); 53 | } 54 | } 55 | 56 | void set_level(int l) override { 57 | if (my_log_ == nullptr) { 58 | return; 59 | } 60 | 61 | if (l < 0) { 62 | l = 1; 63 | } 64 | if (l > 6) { 65 | l = 6; 66 | } 67 | my_log_->SetLogLevel(l); 68 | } 69 | 70 | auto get_level() -> int override { 71 | if (my_log_ == nullptr) { 72 | return 0; 73 | } 74 | return my_log_->GetLogLevel(); 75 | } 76 | 77 | auto GetLogger() const -> SimpleLogger * { return my_log_; } 78 | 79 | private: 80 | SimpleLogger *my_log_; 81 | }; 82 | 83 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/cluster/raft_stuff.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "cluster/in_memory_state_mgr.h" 5 | #include "log_state_machine.h" 6 | #include "logger/logger.h" // 包含 logger.h 以使用日志记录器 7 | namespace vectordb { 8 | 9 | static const nuraft::raft_params::return_method_type CALL_TYPE 10 | = nuraft::raft_params::blocking; 11 | // = nuraft::raft_params::async_handler; 12 | class RaftStuff { 13 | public: 14 | RaftStuff(int node_id, std::string &endpoint, int port, VectorDatabase *vector_database); 15 | 16 | void Init(); 17 | auto AddSrv(int srv_id, const std::string &srv_endpoint) -> bool; 18 | void EnableElectionTimeout(int lower_bound, int upper_bound); // 定义 enableElectionTimeout 方法 19 | auto IsLeader() const -> bool; // 添加 isLeader 方法声明 20 | auto GetAllNodesInfo() const -> std::vector>; 21 | auto GetCurrentNodesInfo() const -> std::tuple; 22 | auto GetNodeStatus(int node_id) const -> std::string; // 添加 getNodeStatus 方法声明 23 | void AppendEntries(const std::string &entry); 24 | auto GetSrvConfig(int srv_id) -> nuraft::ptr; 25 | void HandleResult(nuraft::cmd_result< nuraft::ptr >& result); 26 | private: 27 | 28 | private: 29 | int node_id_; 30 | std::string endpoint_; 31 | nuraft::ptr smgr_; 32 | nuraft::ptr sm_; 33 | int port_; 34 | nuraft::raft_launcher launcher_; 35 | nuraft::ptr raft_instance_; 36 | VectorDatabase *vector_database_; // 添加一个 VectorDatabase 指针成员变量 37 | }; 38 | 39 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/common/constants.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | namespace vectordb { 3 | #define LOGGER_NAME "GlobalLogger" 4 | 5 | #define RESPONSE_VECTORS "vectors" 6 | #define RESPONSE_DISTANCES "distances" 7 | 8 | #define REQUEST_VECTORS "vectors" 9 | #define REQUEST_K "k" 10 | #define REQUEST_ID "id" 11 | #define REQUEST_INDEX_TYPE "indexType" 12 | #define INSTANCE_ID "instanceId" 13 | #define NODE_ID "nodeId" 14 | 15 | #define RESPONSE_RETCODE "retCode" // 添加宏定义 16 | #define RESPONSE_RETCODE_SUCCESS 0 17 | #define RESPONSE_RETCODE_ERROR (-1) 18 | 19 | #define RESPONSE_ERROR_MSG "errorMsg" // 添加宏定义 20 | 21 | #define RESPONSE_CONTENT_TYPE_JSON "application/json" 22 | #define RESPONSE_CONTENT_TYPE_TEXT "text/plain" 23 | 24 | #define INDEX_TYPE_FLAT "FLAT" // 添加宏定义 25 | #define INDEX_TYPE_HNSW "HNSW" // 添加宏定义 26 | 27 | // 其他字符串常量... 28 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/common/master_cfg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "common/vector_cfg.h" 9 | #include "common/vector_utils.h" 10 | #include "spdlog/spdlog.h" 11 | namespace vectordb { 12 | 13 | class MasterCfg : public Singleton { 14 | friend class Singleton; 15 | 16 | public: 17 | // Need call SetCfg first 18 | static void SetCfg(const std::string &path) { 19 | assert(cfg_path.empty() && !path.empty()); 20 | cfg_path = path; 21 | } 22 | 23 | auto Port() const noexcept -> int { return master_port_; } 24 | auto Address() const noexcept -> const std::string & { return master_host_; } 25 | auto EtcdEndpoints() const noexcept -> const std::string & { return etcd_endpoints_; } 26 | 27 | auto GlogName() const noexcept -> const std::string & { return m_log_cfg_.m_glog_name_; } 28 | auto GlogLevel() const noexcept -> spdlog::level::level_enum { 29 | return static_cast(m_log_cfg_.m_level_); 30 | } 31 | 32 | private: 33 | MasterCfg() { ParseCfgFile(cfg_path); } 34 | 35 | void ParseCfgFile(const std::string &path); 36 | 37 | private: 38 | int master_port_; 39 | std::string master_host_; 40 | std::string etcd_endpoints_; 41 | 42 | LogCfg m_log_cfg_; 43 | 44 | static std::string cfg_path; 45 | }; 46 | } // namespace vectordb 47 | -------------------------------------------------------------------------------- /src/include/common/proxy_cfg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "common/vector_cfg.h" 9 | #include "common/vector_utils.h" 10 | #include "spdlog/spdlog.h" 11 | namespace vectordb { 12 | 13 | class ProxyCfg : public Singleton { 14 | friend class Singleton; 15 | 16 | public: 17 | // Need call SetCfg first 18 | static void SetCfg(const std::string &path) { 19 | assert(cfg_path.empty() && !path.empty()); 20 | cfg_path = path; 21 | } 22 | auto InstanceId() const noexcept -> int { return instance_id_; } 23 | auto Port() const noexcept -> int { return proxy_port_; } 24 | auto Address() const noexcept -> const std::string & { return proxy_host_; } 25 | 26 | auto MasterPort() const noexcept -> int { return master_port_; } 27 | auto MasterHost() const noexcept -> const std::string & { return master_host_; } 28 | auto ReadPaths() const noexcept -> const std::set & { return read_paths_; } 29 | auto WritePaths() const noexcept -> const std::set & { return write_paths_; } 30 | auto GlogName() const noexcept -> const std::string & { return m_log_cfg_.m_glog_name_; } 31 | auto GlogLevel() const noexcept -> spdlog::level::level_enum { 32 | return static_cast(m_log_cfg_.m_level_); 33 | } 34 | 35 | private: 36 | ProxyCfg() { ParseCfgFile(cfg_path); } 37 | 38 | void ParseCfgFile(const std::string &path); 39 | 40 | private: 41 | int instance_id_; 42 | int proxy_port_; 43 | std::string proxy_host_; 44 | int master_port_; 45 | std::string master_host_; 46 | std::set read_paths_; // 读请求的路径集合 47 | std::set write_paths_; // 写请求的路径集合 48 | 49 | LogCfg m_log_cfg_; 50 | 51 | static std::string cfg_path; 52 | }; 53 | } // namespace vectordb 54 | -------------------------------------------------------------------------------- /src/include/common/vector_cfg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "common/vector_utils.h" 8 | #include "spdlog/spdlog.h" 9 | namespace vectordb { 10 | 11 | struct LogCfg { 12 | std::string m_glog_name_; 13 | spdlog::level::level_enum m_level_{spdlog::level::level_enum::debug}; 14 | }; 15 | 16 | struct RaftCfg { 17 | int node_id_; 18 | std::string endpoint_; 19 | int port_; 20 | }; 21 | 22 | class Cfg : public Singleton { 23 | friend class Singleton; 24 | 25 | public: 26 | // Need call SetCfg first 27 | static void SetCfg(const std::string &path,int id) { 28 | assert(cfg_path.empty() && !path.empty() && node_id == 0 && id != 0); 29 | cfg_path = path; 30 | node_id = id; 31 | } 32 | 33 | auto RocksDbPath() const noexcept -> const std::string & { return m_rocks_db_path_; } 34 | auto WalPath() const noexcept -> const std::string & { return wal_path_; } 35 | auto SnapPath() const noexcept -> const std::string & { return snap_path_; } 36 | 37 | auto TestRocksDbPath() const noexcept -> const std::string & { return test_rocks_db_path_; } 38 | auto TestWalPath() const noexcept -> const std::string & { return test_wal_path_; } 39 | auto TestSnapPath() const noexcept -> const std::string & { return test_snap_path_; } 40 | 41 | auto GlogName() const noexcept -> const std::string & { return m_log_cfg_.m_glog_name_; } 42 | 43 | auto GlogLevel() const noexcept -> spdlog::level::level_enum { 44 | return static_cast(m_log_cfg_.m_level_); 45 | } 46 | 47 | auto Port() const noexcept -> int { return port_; } 48 | auto Address() const noexcept -> const std::string & { return address_; } 49 | auto RaftNodeId() const noexcept -> int { return raft_cfg_.node_id_; } 50 | auto RaftPort() const noexcept -> int { return raft_cfg_.port_; } 51 | auto RaftEndpoint() const noexcept -> const std::string & { return raft_cfg_.endpoint_; } 52 | 53 | private: 54 | Cfg() { ParseCfgFile(cfg_path,node_id); } 55 | 56 | void ParseCfgFile(const std::string &path, const int &node_id); 57 | 58 | 59 | std::string m_rocks_db_path_; 60 | std::string wal_path_; 61 | std::string snap_path_; 62 | LogCfg m_log_cfg_; 63 | RaftCfg raft_cfg_; 64 | 65 | std::string test_rocks_db_path_; 66 | std::string test_wal_path_; 67 | std::string test_snap_path_; 68 | std::string address_; 69 | int port_; 70 | 71 | static std::string cfg_path; 72 | static int node_id; 73 | }; 74 | } // namespace vectordb 75 | -------------------------------------------------------------------------------- /src/include/common/vector_init.h: -------------------------------------------------------------------------------- 1 | #include "common/vector_cfg.h" 2 | #include "common/proxy_cfg.h" 3 | #include "common/vector_utils.h" 4 | 5 | namespace vectordb { 6 | void VdbServerInit(int node_id); 7 | void ProxyServerInit(); 8 | void MasterServerInit(); 9 | } -------------------------------------------------------------------------------- /src/include/common/vector_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | namespace vectordb { 6 | class NonCopyable { 7 | public: 8 | NonCopyable(const NonCopyable &) = delete; 9 | auto operator=(const NonCopyable &) -> const NonCopyable & = delete; 10 | NonCopyable(NonCopyable &&) = delete; 11 | auto operator=(NonCopyable &&) -> const NonCopyable & = delete; 12 | 13 | protected: 14 | NonCopyable() = default; 15 | ~NonCopyable() = default; 16 | }; 17 | 18 | // 仅链接到一个可执行文件 且保持默认导出所有符号表 19 | template 20 | class Singleton : public NonCopyable { 21 | public: 22 | static auto Instance() -> Derived & { 23 | static Derived ins{}; 24 | return ins; 25 | } 26 | }; 27 | 28 | template 29 | constexpr auto CompileValue(T value __attribute__((unused)), T debugValue __attribute__((unused))) -> T { 30 | #ifdef NDEBUG 31 | return value; 32 | #else 33 | return debugValue; 34 | #endif 35 | } 36 | 37 | inline auto GetCfgPath(const std::string &config_name) -> std::string { 38 | const char *code_base = std::getenv("VECTORDB_CODE_BASE"); 39 | const std::string& cfg_name = config_name; 40 | char *config_path = static_cast(malloc(strlen(code_base) + cfg_name.length() + 2)); 41 | memset(config_path, 0, strlen(code_base) + cfg_name.length() + 2); 42 | strcat(config_path, code_base); 43 | strcat(config_path, "/"); 44 | strcat(config_path, cfg_name.c_str()); 45 | std::string path_str(config_path); 46 | return path_str; 47 | } 48 | 49 | 50 | } // namespace vectordb 51 | -------------------------------------------------------------------------------- /src/include/database/persistence.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include // 包含 以使用 uint64_t 类型 6 | #include // 包含 rapidjson/document.h 以使用 JSON 对象 7 | #include 8 | #include "index/index_factory.h" 9 | #include "common/vector_cfg.h" 10 | namespace vectordb { 11 | 12 | class Persistence { 13 | public: 14 | Persistence(); 15 | ~Persistence(); 16 | 17 | void Init(const std::string& local_path); // 添加 init 方法声明 18 | auto IncreaseId() -> uint64_t; 19 | auto GetId() const -> uint64_t; 20 | void WriteWalLog(const std::string& operation_type, const rapidjson::Document& json_data, const std::string& version); // 添加 version 参数 21 | void WriteWalRawLog(uint64_t log_id, const std::string& operation_type, const std::string& raw_data, const std::string& version); // 添加 writeWALRawLog 函数声明 22 | void ReadNextWalLog(std::string* operation_type, rapidjson::Document* json_data); // 更改返回类型为 void 并添加指针参数 23 | void TakeSnapshot(); 24 | void LoadSnapshot(); // 添加 loadSnapshot 方法声明 25 | void SaveLastSnapshotId(const std::string& folder_path); // 添加 saveLastSnapshotID 方法声明 26 | void LoadLastSnapshotId(const std::string& folder_path); // 添加 loadLastSnapshotID 方法声明 27 | 28 | 29 | private: 30 | uint64_t increase_id_; 31 | uint64_t last_snapshot_id_; // 添加 lastSnapshotID_ 成员变量 32 | std::fstream wal_log_file_; // 将 wal_log_file_ 类型更改为 std::fstream 33 | }; 34 | 35 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/database/scalar_storage.h: -------------------------------------------------------------------------------- 1 | #ifndef SCALART_STORAGE_H 2 | #define SCALART_STORAGE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include // 包含rapidjson头文件 8 | namespace vectordb { 9 | class ScalarStorage { 10 | public: 11 | // 构造函数,打开RocksDB 12 | explicit ScalarStorage(const std::string& db_path); 13 | 14 | // 析构函数,关闭RocksDB 15 | ~ScalarStorage(); 16 | 17 | // 向量插入函数 18 | void InsertScalar(uint64_t id, const rapidjson::Document& data); // 将参数类型更改为rapidjson::Document 19 | 20 | // 根据ID查询向量函数 21 | auto GetScalar(uint64_t id) -> rapidjson::Document; // 将返回类型更改为rapidjson::Document 22 | 23 | private: 24 | // RocksDB实例 25 | rocksdb::DB* db_; 26 | }; 27 | } // namespace vectordb 28 | 29 | #endif -------------------------------------------------------------------------------- /src/include/database/vector_database.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "database/scalar_storage.h" 4 | #include "index/index_factory.h" 5 | #include 6 | #include 7 | #include 8 | #include "database/persistence.h" 9 | namespace vectordb { 10 | 11 | class VectorDatabase { 12 | public: 13 | // 构造函数 14 | explicit VectorDatabase(const std::string& db_path,const std::string& wal_path); 15 | 16 | // 插入或更新向量 17 | void Upsert(uint64_t id, const rapidjson::Document& data, IndexFactory::IndexType index_type); 18 | auto Query(uint64_t id) -> rapidjson::Document; // 添加query接口 19 | auto Search(const rapidjson::Document& json_request) -> std::pair, std::vector>; 20 | void ReloadDatabase(); // 添加 reloadDatabase 方法声明 21 | void WriteWalLog(const std::string& operation_type, const rapidjson::Document& json_data); // 添加 writeWALLog 方法声明 22 | void WriteWalLogWithId(uint64_t log_id, const std::string& data); 23 | auto GetIndexTypeFromRequest(const rapidjson::Document& json_request) -> vectordb::IndexFactory::IndexType; 24 | void TakeSnapshot(); 25 | auto GetStartIndexId() const -> int64_t; // 添加 getStartIndexID 函数声明 26 | private: 27 | ScalarStorage scalar_storage_; 28 | Persistence persistence_; // 添加 Persistence 对象 29 | }; 30 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/httpserver/admin_service_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "brpc/stream.h" 5 | #include "cluster/raft_stuff.h" 6 | #include "database/vector_database.h" 7 | #include "http.pb.h" 8 | #include "httplib/httplib.h" 9 | #include "httpserver/base_service_impl.h" 10 | #include "index/faiss_index.h" 11 | #include "index/index_factory.h" 12 | 13 | namespace vectordb { 14 | 15 | class AdminServiceImpl : public nvm::AdminService, public BaseServiceImpl { 16 | public: 17 | explicit AdminServiceImpl(VectorDatabase *database, RaftStuff *raft_stuff) 18 | : vector_database_(database), raft_stuff_(raft_stuff){}; 19 | ~AdminServiceImpl() override = default; 20 | 21 | void snapshot(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 22 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 23 | 24 | void SetLeader(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 25 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 26 | void AddFollower(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 27 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 28 | void ListNode(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 29 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 30 | void GetNode(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 31 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 32 | 33 | private: 34 | VectorDatabase *vector_database_ = nullptr; 35 | RaftStuff *raft_stuff_ = nullptr; 36 | }; 37 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/httpserver/base_service_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "brpc/controller.h" 5 | #include "brpc/stream.h" 6 | #include "common/constants.h" 7 | #include "database/vector_database.h" 8 | #include "http.pb.h" 9 | #include "httplib/httplib.h" 10 | #include "index/faiss_index.h" 11 | #include "index/index_factory.h" 12 | #include "rapidjson/stringbuffer.h" 13 | #include "rapidjson/writer.h" 14 | namespace vectordb { 15 | class BaseServiceImpl { 16 | public: 17 | enum class CheckType { SEARCH, INSERT, UPSERT }; 18 | void SetJsonResponse(const rapidjson::Document &json_response, brpc::Controller *cntl); 19 | void SetTextResponse(const std::string &response, brpc::Controller *cntl); 20 | void SetJsonResponse(const std::string &response, brpc::Controller *cntl); 21 | void SetJsonResponse(const std::string &response, brpc::Controller *cntl,int status_code); 22 | void SetTextResponse(const std::string &response, brpc::Controller *cntl,int status_code); 23 | void SetErrorJsonResponse(brpc::Controller *cntl, int error_code, const std::string &errorMsg); 24 | void SetResponse(brpc::Controller *cntl, int retCode, const std::string &msg, 25 | const rapidjson::Document *data = nullptr); 26 | 27 | auto IsRequestValid(const rapidjson::Document &json_request, CheckType check_type) -> bool; 28 | 29 | auto GetIndexTypeFromRequest(const rapidjson::Document &json_request) -> vectordb::IndexFactory::IndexType; 30 | 31 | static auto WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) -> size_t ; 32 | }; 33 | 34 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/httpserver/http_server.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "index/faiss_index.h" 3 | #include "httplib/httplib.h" 4 | #include "index/index_factory.h" 5 | #include "database/vector_database.h" 6 | #include "httpserver/admin_service_impl.h" 7 | #include "httpserver/user_service_impl.h" 8 | #include 9 | #include 10 | #include 11 | 12 | namespace vectordb { 13 | 14 | class HttpServer: public brpc::Server { 15 | public: 16 | auto Init(VectorDatabase *vector_database,RaftStuff *raft_stuff) -> bool; 17 | 18 | private: 19 | 20 | VectorDatabase* vector_database_; 21 | RaftStuff *raft_stuff_; 22 | std::unique_ptr user_service_impl_; 23 | std::unique_ptr admin_service_impl_; 24 | }; 25 | } // namespace vectordb 26 | -------------------------------------------------------------------------------- /src/include/httpserver/master_server.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "httpserver/master_service_impl.h" 5 | namespace vectordb { 6 | class MasterServer : public brpc::Server { 7 | public: 8 | ~MasterServer(); 9 | auto Init(const std::string &etcdEndpoints) -> bool; 10 | void StartNodeUpdateTimer(); 11 | 12 | private: 13 | std::unique_ptr master_service_impl_; 14 | bool running_; // 控制定时器线程的运行 15 | }; 16 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/httpserver/master_service_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "brpc/stream.h" 9 | #include "cluster/raft_stuff.h" 10 | #include "database/vector_database.h" 11 | #include "gmock/gmock.h" 12 | #include "http.pb.h" 13 | #include "httpserver/base_service_impl.h" 14 | #include "index/faiss_index.h" 15 | #include "index/index_factory.h" 16 | namespace vectordb { 17 | 18 | enum class ServerRole { Master, Backup }; 19 | 20 | struct ServerInfo { 21 | std::string url_; 22 | ServerRole role_; 23 | auto ToJson() const -> rapidjson::Document; 24 | static auto FromJson(const rapidjson::Document &value) -> ServerInfo; 25 | }; 26 | 27 | struct Partition { 28 | uint64_t partition_id_; 29 | uint64_t node_id_; 30 | }; 31 | 32 | struct PartitionConfig { 33 | std::string partition_key_; 34 | int number_of_partitions_; 35 | std::list partitions_; // 使用 std::list 存储分区信息 36 | }; 37 | 38 | class MasterServiceImpl : public nvm::MasterService, public BaseServiceImpl { 39 | public: 40 | explicit MasterServiceImpl(const std::string &etcdEndpoints) : etcd_client_(etcdEndpoints){}; 41 | 42 | ~MasterServiceImpl() override = default; 43 | 44 | void GetNodeInfo(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 45 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 46 | 47 | void AddNode(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 48 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 49 | 50 | void RemoveNode(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 51 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 52 | 53 | void GetInstance(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 54 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 55 | 56 | void GetPartitionConfig(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 57 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 58 | 59 | void UpdatePartitionConfig(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 60 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 61 | 62 | void UpdateNodeStates(); 63 | 64 | private: 65 | auto DoGetPartitionConfig(uint64_t instanceId) -> PartitionConfig; 66 | void DoUpdatePartitionConfig(uint64_t instanceId, const std::string &partitionKey, int numberOfPartitions, 67 | const std::list &partitions); 68 | 69 | private: 70 | etcd::Client etcd_client_; 71 | std::map node_error_counts_; // 错误计数器 72 | }; 73 | } // namespace vectordb 74 | -------------------------------------------------------------------------------- /src/include/httpserver/proxy_server.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "httpserver/proxy_service_impl.h" 10 | 11 | namespace vectordb { 12 | 13 | class ProxyServer : public brpc::Server { 14 | public: 15 | ~ProxyServer(); 16 | auto Init(const std::string &masterServerHost, int masterServerPort, int instanceId, 17 | const std::set &read_path, const std::set &write_paths) -> bool; 18 | 19 | private: 20 | void InitCurl(); 21 | void CleanupCurl(); 22 | void FetchAndUpdateNodes(); // 获取并更新节点信息 23 | void FetchAndUpdatePartitionConfig(); 24 | void StartNodeUpdateTimer(); // 启动节点更新定时器 25 | void StartPartitionUpdateTimer(); 26 | 27 | private: 28 | int instance_id_; // 当前 Proxy Server 所属的实例 ID 29 | std::string master_server_host_; // Master Server 的主机地址 30 | int master_server_port_; // Master Server 的端口 31 | bool running_; // 控制定时器线程的运行 32 | std::set read_paths_; // 读请求的路径集合 33 | std::set write_paths_; // 写请求的路径集合 34 | CURL *curl_handle_; 35 | 36 | std::unique_ptr proxy_service_impl_; 37 | }; 38 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/httpserver/proxy_service_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "brpc/stream.h" 8 | #include "cluster/raft_stuff.h" 9 | #include "database/vector_database.h" 10 | #include "http.pb.h" 11 | #include "httpserver/base_service_impl.h" 12 | #include "index/faiss_index.h" 13 | #include "index/index_factory.h" 14 | 15 | namespace vectordb { 16 | // 节点信息结构 17 | struct NodeInfo { 18 | uint64_t node_id_; 19 | std::string url_; 20 | int role_; // 例如,0 表示主节点,1 表示从节点 21 | }; 22 | 23 | struct NodePartitionInfo { 24 | uint64_t partition_id_; 25 | std::vector nodes_; // 存储具有相同 partitionId 的所有节点 26 | }; 27 | 28 | struct NodePartitionConfig { 29 | std::string partition_key_; // 分区键 30 | int number_of_partitions_; // 分区的数量 31 | std::map nodes_info_; 32 | }; 33 | 34 | class ProxyServiceImpl : public nvm::ProxyService, public BaseServiceImpl { 35 | public: 36 | explicit ProxyServiceImpl(std::set read_path, std::set write_paths, 37 | std::string masterServerHost, int masterServerPort, const int &instanceId, 38 | CURL *curl_handle) 39 | : instance_id_(instanceId), 40 | master_server_host_(std::move(masterServerHost)), 41 | master_server_port_(masterServerPort), 42 | read_paths_(std::move(read_path)), 43 | write_paths_(std::move(write_paths)), 44 | curl_handle_(curl_handle){}; 45 | 46 | ~ProxyServiceImpl() override = default; 47 | 48 | void search(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 49 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 50 | 51 | void upsert(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 52 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 53 | 54 | void topology(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 55 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 56 | 57 | void FetchAndUpdateNodes(); 58 | void FetchAndUpdatePartitionConfig(); 59 | 60 | private: 61 | void ForwardRequest(brpc::Controller *cntl, ::google::protobuf::Closure *done, const std::string &path); 62 | auto ExtractPartitionKeyValue(const std::string &req, std::string &partitionKeyValue) -> bool; 63 | auto CalculatePartitionId(const std::string &partitionKeyValue) -> int; 64 | auto SendRequestToPartition(brpc::Controller *cntl, const std::string& path, int partitionId) -> std::unique_ptr; 65 | void BroadcastRequestToAllPartitions(brpc::Controller *cntl, const std::string& path); 66 | auto SelectTargetNode(brpc::Controller *cntl, int partitionId, const std::string& path, NodeInfo& targetNode) -> bool; 67 | void ForwardToTargetNode(brpc::Controller *cntl, const std::string& path, const NodeInfo& targetNode); 68 | void ProcessAndRespondToBroadcast(brpc::Controller *cntl, const std::vector< std::unique_ptr>& allResponses, uint k); 69 | 70 | private: 71 | int instance_id_; // 当前 Proxy Server 所属的实例 ID 72 | std::string master_server_host_; // Master Server 的主机地址 73 | int master_server_port_; // Master Server 的端口 74 | std::vector nodes_[2]; // 使用两个数组 75 | std::atomic active_nodes_index_; // 指示当前活动的数组索引 76 | std::atomic next_node_index_; // 轮询索引 77 | std::mutex nodes_mutex_; // 保证节点信息的线程安全访问 78 | std::set read_paths_; // 读请求的路径集合 79 | std::set write_paths_; // 写请求的路径集合 80 | CURL *curl_handle_; 81 | 82 | NodePartitionConfig node_partitions_[2]; // 使用两个数组进行无锁交替更新 83 | std::atomic active_partition_index_; // 指示当前活动的分区数组索引 84 | }; 85 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/httpserver/user_service_impl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "brpc/stream.h" 5 | #include "cluster/raft_stuff.h" 6 | #include "database/vector_database.h" 7 | #include "http.pb.h" 8 | #include "httplib/httplib.h" 9 | #include "httpserver/base_service_impl.h" 10 | #include "index/faiss_index.h" 11 | #include "index/index_factory.h" 12 | 13 | namespace vectordb { 14 | 15 | class UserServiceImpl : public nvm::UserService, public BaseServiceImpl { 16 | public: 17 | explicit UserServiceImpl(VectorDatabase *database, RaftStuff *raft_stuff) 18 | : vector_database_(database), raft_stuff_(raft_stuff){}; 19 | ~UserServiceImpl() override = default; 20 | 21 | void search(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 22 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 23 | 24 | void insert(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 25 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 26 | 27 | void upsert(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 28 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 29 | 30 | void query(::google::protobuf::RpcController *controller, const ::nvm::HttpRequest * /*request*/, 31 | ::nvm::HttpResponse * /*response*/, ::google::protobuf::Closure *done) override; 32 | 33 | private: 34 | VectorDatabase *vector_database_ = nullptr; 35 | RaftStuff *raft_stuff_ = nullptr; 36 | }; 37 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/index/faiss_index.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "faiss/Index.h" 5 | #include 6 | #include 7 | #include 8 | #include "roaring/roaring.h" 9 | namespace vectordb { 10 | 11 | // 定义 RoaringBitmapIDSelector 结构体 12 | struct RoaringBitmapIDSelector : faiss::IDSelector { 13 | explicit RoaringBitmapIDSelector(const roaring_bitmap_t* bitmap) : bitmap_(bitmap) {} 14 | 15 | auto is_member(int64_t id) const -> bool final; 16 | 17 | ~RoaringBitmapIDSelector() override = default; 18 | 19 | const roaring_bitmap_t* bitmap_; 20 | }; 21 | class FaissIndex { 22 | public: 23 | explicit FaissIndex(faiss::Index* index); 24 | void InsertVectors(const std::vector& data, int64_t label); 25 | auto SearchVectors(const std::vector& query, int k, const roaring_bitmap_t* bitmap = nullptr) -> std::pair, std::vector>; 26 | void RemoveVectors(const std::vector& ids); 27 | void SaveIndex(const std::string& file_path); // 添加 saveIndex 方法声明 28 | void LoadIndex(const std::string& file_path); // 将返回类型更改为 faiss::Index* 29 | private: 30 | faiss::Index* index_; 31 | }; 32 | } // namespace vectordb 33 | -------------------------------------------------------------------------------- /src/include/index/filter_index.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include // 包含 以使用 std::shared_ptr 9 | #include "database/scalar_storage.h" 10 | #include "roaring/roaring.h" 11 | 12 | namespace vectordb { 13 | 14 | class FilterIndex { 15 | public: 16 | enum class Operation { 17 | EQUAL, 18 | NOT_EQUAL 19 | }; 20 | 21 | FilterIndex(); 22 | void AddIntFieldFilter(const std::string& fieldname, int64_t value, uint64_t id); 23 | void UpdateIntFieldFilter(const std::string& fieldname, int64_t* old_value, int64_t new_value, uint64_t id); // 将 old_value 参数更改为指针类型 24 | void GetIntFieldFilterBitmap(const std::string& fieldname, Operation op, int64_t value, roaring_bitmap_t* result_bitmap); // 添加 result_bitmap 参数 25 | auto SerializeIntFieldFilter() -> std::string; // 添加 serializeIntFieldFilter 方法声明 26 | void DeserializeIntFieldFilter(const std::string& serialized_data); // 添加 deserializeIntFieldFilter 方法声明 27 | void SaveIndex(const std::string& path); // 添加 path 参数 28 | void LoadIndex(const std::string& path); // 添加 path 参数 29 | 30 | private: 31 | std::map> int_field_filter_; 32 | }; 33 | 34 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/index/hnswlib_index.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "hnswlib/hnswlib.h" 5 | #include "index_factory.h" 6 | namespace vectordb { 7 | class HNSWLibIndex { 8 | public: 9 | // 构造函数 10 | HNSWLibIndex(int dim, int num_data, IndexFactory::MetricType metric, int M = 16, int ef_construction = 200); // 将MetricType参数修改为第三个参数 11 | 12 | // 插入向量 13 | void InsertVectors(const std::vector& data, int64_t label); 14 | 15 | // 查询向量 16 | auto SearchVectors(const std::vector& query, int k, const roaring_bitmap_t* bitmap = nullptr,int ef_search = 50) -> std::pair, std::vector>; 17 | 18 | void RemoveVectors(const std::vector& ids); 19 | 20 | void SaveIndex(const std::string& file_path); // 添加 saveIndex 方法声明 21 | void LoadIndex(const std::string& file_path); // 添加 loadIndex 方法声明 22 | 23 | // 定义 RoaringBitmapIDFilter 类 24 | class RoaringBitmapIDFilter : public hnswlib::BaseFilterFunctor { 25 | public: 26 | explicit RoaringBitmapIDFilter(const roaring_bitmap_t* bitmap) : bitmap_(bitmap) {} 27 | 28 | auto operator()(hnswlib::labeltype label) -> bool override { 29 | return roaring_bitmap_contains(bitmap_, static_cast(label)); 30 | } 31 | 32 | private: 33 | const roaring_bitmap_t* bitmap_; 34 | }; 35 | 36 | private: 37 | // int dim_; 38 | hnswlib::SpaceInterface* space_; 39 | hnswlib::HierarchicalNSW* index_; 40 | size_t max_elements_; // 添加 max_elements 成员变量 41 | }; 42 | } // namespace vectordb 43 | 44 | -------------------------------------------------------------------------------- /src/include/index/index_factory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "faiss_index.h" 4 | #include "faiss/IndexFlat.h" 5 | #include "faiss/IndexIDMap.h" 6 | #include "common/vector_utils.h" 7 | #include 8 | 9 | namespace vectordb { 10 | class IndexFactory: public Singleton{ 11 | friend class Singleton; 12 | public: 13 | enum class IndexType { 14 | FLAT, 15 | HNSW, 16 | FILTER, // 添加 FILTER 枚举值 17 | UNKNOWN = -1 18 | }; 19 | 20 | enum class MetricType { 21 | L2, 22 | IP 23 | }; 24 | 25 | void Init(IndexType type, int dim, int num_data, MetricType metric = MetricType::L2); 26 | auto GetIndex(IndexType type) const -> void*; 27 | void SaveIndex(const std::string& folder_path); // 添加 ScalarStorage 参数 28 | void LoadIndex(const std::string& folder_path); // 添加 loadIndex 方法声明 29 | 30 | 31 | 32 | private: 33 | 34 | std::map index_map_; 35 | 36 | }; 37 | 38 | } // namespace vectordb -------------------------------------------------------------------------------- /src/include/logger/logger.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGGER_H 2 | #define LOGGER_H 3 | 4 | #include "spdlog/spdlog.h" 5 | #include "common/vector_cfg.h" 6 | namespace vectordb { 7 | extern std::shared_ptr global_logger; 8 | 9 | void InitGlobalLogger(const std::string &log_name); 10 | void SetLogLevel(spdlog::level::level_enum log_level); 11 | } 12 | #endif -------------------------------------------------------------------------------- /src/index/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( 2 | vectorDB_index 3 | OBJECT 4 | faiss_index.cpp 5 | hnswlib_index.cpp 6 | index_factory.cpp 7 | filter_index.cpp 8 | ) 9 | 10 | set(ALL_OBJECT_FILES 11 | ${ALL_OBJECT_FILES} $ 12 | PARENT_SCOPE) -------------------------------------------------------------------------------- /src/index/faiss_index.cpp: -------------------------------------------------------------------------------- 1 | #include "index/faiss_index.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "common/constants.h" 7 | #include "logger/logger.h" 8 | #include // 更正头文件 9 | #include 10 | 11 | namespace vectordb { 12 | FaissIndex::FaissIndex(faiss::Index *index) : index_(index) {} 13 | 14 | auto RoaringBitmapIDSelector::is_member(int64_t id) const -> bool { 15 | return roaring_bitmap_contains(bitmap_, static_cast(id)); 16 | } 17 | 18 | void FaissIndex::InsertVectors(const std::vector &data, int64_t label) { 19 | auto id = static_cast(label); 20 | index_->add_with_ids(1, data.data(), &id); 21 | } 22 | 23 | auto FaissIndex::SearchVectors(const std::vector &query, int k, const roaring_bitmap_t *bitmap) 24 | -> std::pair, std::vector> { 25 | int dim = index_->d; 26 | int num_queries = query.size() / dim; 27 | std::vector indices(num_queries * k); 28 | std::vector distances(num_queries * k); 29 | 30 | // 如果传入了 bitmap 参数,则使用 RoaringBitmapIDSelector 初始化 faiss::SearchParameters 对象 31 | faiss::SearchParameters search_params; 32 | RoaringBitmapIDSelector selector(bitmap); 33 | if (bitmap != nullptr) { 34 | search_params.sel = &selector; 35 | } 36 | 37 | index_->search(num_queries, query.data(), k, distances.data(), indices.data(),&search_params); 38 | 39 | global_logger->debug("Retrieved values:"); 40 | for (size_t i = 0; i < indices.size(); ++i) { 41 | if (indices[i] != -1) { 42 | global_logger->debug("ID: {}, Distance: {}", indices[i], distances[i]); 43 | } else { 44 | global_logger->debug("No specific value found"); 45 | } 46 | } 47 | return {indices, distances}; 48 | } 49 | 50 | void FaissIndex::RemoveVectors(const std::vector &ids) { // 添加remove_vectors函数实现 51 | auto *id_map = dynamic_cast(index_); 52 | if (index_ != nullptr) { 53 | // 初始化IDSelectorBatch对象 54 | faiss::IDSelectorBatch selector(ids.size(), ids.data()); 55 | auto remove_size = id_map->remove_ids(selector); 56 | global_logger->debug("remove size = {}", remove_size); 57 | } else { 58 | throw std::runtime_error("Underlying Faiss index is not an IndexIDMap"); 59 | } 60 | } 61 | 62 | void FaissIndex::SaveIndex(const std::string& file_path) { // 添加 saveIndex 方法实现 63 | faiss::write_index(index_, file_path.c_str()); 64 | } 65 | 66 | void FaissIndex::LoadIndex(const std::string& file_path) { // 添加 loadIndex 方法实现 67 | std::ifstream file(file_path); // 尝试打开文件 68 | if (file.good()) { // 检查文件是否存在 69 | file.close(); 70 | delete index_; 71 | index_ = faiss::read_index(file_path.c_str()); 72 | } else { 73 | global_logger->warn("File not found: {}. Skipping loading index.", file_path); 74 | } 75 | } 76 | 77 | } // namespace vectordb -------------------------------------------------------------------------------- /src/index/filter_index.cpp: -------------------------------------------------------------------------------- 1 | #include "index/filter_index.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "logger/logger.h" 9 | #include "snappy.h" 10 | namespace vectordb { 11 | 12 | vectordb::FilterIndex::FilterIndex() = default; 13 | 14 | void FilterIndex::AddIntFieldFilter(const std::string &fieldname, int64_t value, uint64_t id) { 15 | roaring_bitmap_t *bitmap = roaring_bitmap_create(); 16 | roaring_bitmap_add(bitmap, id); 17 | int_field_filter_[fieldname][value] = bitmap; 18 | global_logger->debug("Added int field filter: fieldname={}, value={}, id={}", fieldname, value, id); // 添加打印信息 19 | } 20 | 21 | void FilterIndex::UpdateIntFieldFilter(const std::string &fieldname, int64_t *old_value, int64_t new_value, 22 | uint64_t id) { // 将 old_value 参数更改为指针类型 23 | if (old_value != nullptr) { 24 | global_logger->debug("Updated int field filter: fieldname={}, old_value={}, new_value={}, id={}", fieldname, 25 | *old_value, new_value, id); 26 | } else { 27 | global_logger->debug("Updated int field filter: fieldname={}, old_value=nullptr, new_value={}, id={}", fieldname, 28 | new_value, id); 29 | } 30 | 31 | auto it = int_field_filter_.find(fieldname); 32 | if (it != int_field_filter_.end()) { 33 | std::map &value_map = it->second; 34 | 35 | // 查找旧值对应的位图,并从位图中删除 ID 36 | auto old_bitmap_it = 37 | (old_value != nullptr) ? value_map.find(*old_value) : value_map.end(); // 使用解引用的 old_value 38 | if (old_bitmap_it != value_map.end()) { 39 | roaring_bitmap_t *old_bitmap = old_bitmap_it->second; 40 | roaring_bitmap_remove(old_bitmap, id); 41 | } 42 | 43 | // 查找新值对应的位图,如果不存在则创建一个新的位图 44 | auto new_bitmap_it = value_map.find(new_value); 45 | if (new_bitmap_it == value_map.end()) { 46 | roaring_bitmap_t *new_bitmap = roaring_bitmap_create(); 47 | value_map[new_value] = new_bitmap; 48 | new_bitmap_it = value_map.find(new_value); 49 | } 50 | 51 | roaring_bitmap_t *new_bitmap = new_bitmap_it->second; 52 | roaring_bitmap_add(new_bitmap, id); 53 | } else { 54 | AddIntFieldFilter(fieldname, new_value, id); 55 | } 56 | } 57 | 58 | void FilterIndex::GetIntFieldFilterBitmap(const std::string &fieldname, Operation op, int64_t value, 59 | roaring_bitmap_t *result_bitmap) { // 添加 result_bitmap 参数 60 | auto it = int_field_filter_.find(fieldname); 61 | if (it != int_field_filter_.end()) { 62 | auto &value_map = it->second; 63 | 64 | if (op == Operation::EQUAL) { 65 | auto bitmap_it = value_map.find(value); 66 | if (bitmap_it != value_map.end()) { 67 | global_logger->debug("Retrieved EQUAL bitmap for fieldname={}, value={}", fieldname, value); 68 | roaring_bitmap_overwrite(result_bitmap, bitmap_it->second); // 更新 result_bitmap 69 | } 70 | } else if (op == Operation::NOT_EQUAL) { 71 | for (const auto &entry : value_map) { 72 | if (entry.first != value) { 73 | roaring_bitmap_overwrite(result_bitmap, entry.second); // 更新 result_bitmap 74 | } 75 | } 76 | global_logger->debug("Retrieved NOT_EQUAL bitmap for fieldname={}, value={}", fieldname, value); 77 | } 78 | } 79 | } 80 | 81 | auto FilterIndex::SerializeIntFieldFilter() -> std::string { 82 | std::ostringstream oss; 83 | 84 | for (const auto &field_entry : int_field_filter_) { 85 | const std::string &field_name = field_entry.first; 86 | const std::map &value_map = field_entry.second; 87 | 88 | for (const auto &value_entry : value_map) { 89 | int64_t value = value_entry.first; 90 | const roaring_bitmap_t *bitmap = value_entry.second; 91 | 92 | // 将位图序列化为字节数组 93 | uint32_t size = roaring_bitmap_portable_size_in_bytes(bitmap); 94 | char *serialized_bitmap = new char[size]; 95 | roaring_bitmap_portable_serialize(bitmap, serialized_bitmap); 96 | 97 | // 将字段名、值和序列化的位图写入输出流 98 | oss << field_name << "|" << value << "|"; 99 | oss.write(serialized_bitmap, size); 100 | 101 | delete[] serialized_bitmap; 102 | } 103 | } 104 | 105 | return oss.str(); 106 | } 107 | 108 | void FilterIndex::DeserializeIntFieldFilter(const std::string &serialized_data) { 109 | std::istringstream iss(serialized_data); 110 | 111 | // 是否需要clear int_field_filter_? 反正调用前先clear了 112 | 113 | std::string line; 114 | while (std::getline(iss, line)) { 115 | std::istringstream line_iss(line); 116 | 117 | // 从输入流中读取字段名、值和序列化的位图 118 | std::string field_name; 119 | std::getline(line_iss, field_name, '|'); 120 | 121 | std::string value_str; 122 | std::getline(line_iss, value_str, '|'); 123 | int64_t value = std::stol(value_str); 124 | 125 | // 读取序列化的位图 126 | std::string serialized_bitmap(std::istreambuf_iterator(line_iss), {}); 127 | 128 | // 反序列化位图 129 | roaring_bitmap_t *bitmap = roaring_bitmap_portable_deserialize(serialized_bitmap.data()); 130 | 131 | // 将反序列化的位图插入 intFieldFilter 132 | int_field_filter_[field_name][value] = bitmap; 133 | } 134 | } 135 | 136 | void FilterIndex::SaveIndex(const std::string &path) { // 添加 key 参数 137 | std::string serialized_data = SerializeIntFieldFilter(); 138 | std::fstream index_file; // 将 wal_log_file_ 类型更改为 std::fstream 139 | 140 | // 将序列化的数据存储到文件 141 | 142 | if (!std::filesystem::exists(path)) { 143 | // 文件不存在,先创建文件 144 | std::ofstream temp_file(path); 145 | temp_file.close(); 146 | } 147 | 148 | index_file.open(path, std::ios::in |std::ios::out |std::ios::trunc); // 以 std::ios::in | std::ios::out | std::ios::app 模式打开文件 149 | if (!index_file.is_open()) { 150 | global_logger->error("An error occurred while writing the filter index entry. Reason: {}", 151 | std::strerror(errno)); // 使用日志打印错误消息和原因 152 | throw std::runtime_error("Failed to open filter index file at path: " + path); 153 | } 154 | 155 | // 压缩日志条目 156 | std::string compressed_data; 157 | snappy::Compress(serialized_data.c_str(), serialized_data.size(), &compressed_data); 158 | 159 | // 写入压缩后的日志条目到文件 160 | index_file << compressed_data << std::endl; 161 | 162 | if (index_file.fail()) { // 检查是否发生错误 163 | global_logger->error("An error occurred while writing the filter index file. Reason: {}", 164 | std::strerror(errno)); // 使用日志打印错误消息和原因 165 | } else { 166 | global_logger->debug("Wrote filter index file"); // 打印日志 167 | index_file.flush(); // 强制持久化 168 | } 169 | index_file.close(); 170 | } 171 | 172 | void FilterIndex::LoadIndex(const std::string &path) { // 添加 key 参数 173 | std::string compressed_line; 174 | std::fstream index_file; 175 | if (!std::filesystem::exists(path)) { 176 | // 文件不存在,先创建文件 177 | std::ofstream temp_file(path); 178 | temp_file.close(); 179 | } 180 | 181 | index_file.open(path, std::ios::in | std::ios::out | 182 | std::ios::app); // 以 std::ios::in | std::ios::out | std::ios::app 模式打开文件 183 | if (!index_file.is_open()) { 184 | global_logger->error("An error occurred while load the filter index entry. Reason: {}", 185 | std::strerror(errno)); // 使用日志打印错误消息和原因 186 | throw std::runtime_error("Failed to load filter index file at path: " + path); 187 | } 188 | std::string decompressed_data; 189 | if (std::getline(index_file, compressed_line)) { 190 | 191 | if (!snappy::Uncompress(compressed_line.c_str(), compressed_line.size(), &decompressed_data)) { 192 | global_logger->error("Failed to decompress WAL log entry"); 193 | return; 194 | } 195 | } else { 196 | index_file.clear(); 197 | global_logger->debug("No more filter index file to read"); 198 | } 199 | index_file.close(); 200 | // 从序列化的数据中反序列化 intFieldFilter 201 | int_field_filter_.clear(); 202 | DeserializeIntFieldFilter(decompressed_data); 203 | } 204 | 205 | } // namespace vectordb -------------------------------------------------------------------------------- /src/index/hnswlib_index.cpp: -------------------------------------------------------------------------------- 1 | #include "index/hnswlib_index.h" 2 | #include 3 | #include 4 | #include "logger/logger.h" 5 | namespace vectordb { 6 | 7 | HNSWLibIndex::HNSWLibIndex(int dim, int num_data, IndexFactory::MetricType metric, int M, int ef_construction):max_elements_(num_data) 8 | { // 将MetricType参数修改为第三个参数 9 | // bool normalize = false; 10 | if (metric == IndexFactory::MetricType::L2) { 11 | space_ = new hnswlib::L2Space(dim); 12 | } else { 13 | throw std::runtime_error("Invalid metric type."); 14 | } 15 | index_ = new hnswlib::HierarchicalNSW(space_, num_data, M, ef_construction); 16 | } 17 | 18 | void HNSWLibIndex::InsertVectors(const std::vector& data, int64_t label) { 19 | assert(index_ != nullptr); 20 | index_->addPoint(data.data(), label); 21 | } 22 | 23 | // 找到最多K个 可能不满K个 不满的都是label distance 为-1 24 | auto HNSWLibIndex::SearchVectors(const std::vector& query, int k,const roaring_bitmap_t* bitmap , int ef_search) -> std::pair, std::vector> { // 修改返回类型 25 | assert(index_ != nullptr); 26 | index_->setEf(ef_search); 27 | 28 | RoaringBitmapIDFilter* selector = nullptr; 29 | if (bitmap != nullptr) { 30 | selector = new RoaringBitmapIDFilter(bitmap); 31 | } 32 | 33 | auto result = index_->searchKnn(query.data(), k,selector); 34 | 35 | std::vector indices(k,-1); 36 | std::vector distances(k,-1); 37 | int j = 0; 38 | global_logger->debug("Retrieved values:"); 39 | while(!result.empty()){ 40 | auto item = result.top(); 41 | indices[j] = item.second; 42 | distances[j] = item.first; 43 | result.pop(); 44 | global_logger->debug("ID: {}, Distance: {}", indices[j], distances[j]); 45 | j++; 46 | if(j == k){ 47 | break; 48 | } 49 | } 50 | global_logger->debug("HNSW index found {} vectors",j); 51 | 52 | if (bitmap != nullptr) { 53 | delete selector; 54 | } 55 | return {indices, distances}; 56 | } 57 | 58 | void HNSWLibIndex::RemoveVectors(const std::vector& ids) { // 添加RemoveVectors函数实现 59 | assert(index_ != nullptr); 60 | for(const auto &id:ids){ 61 | index_->markDelete(id); 62 | } 63 | } 64 | 65 | void HNSWLibIndex::SaveIndex(const std::string& file_path) { // 添加 saveIndex 方法实现 66 | index_->saveIndex(file_path); 67 | } 68 | 69 | void HNSWLibIndex::LoadIndex(const std::string& file_path) { // 添加 loadIndex 方法实现 70 | std::ifstream file(file_path); // 尝试打开文件 71 | if (file.good()) { // 检查文件是否存在 72 | file.close(); 73 | index_->loadIndex(file_path, space_, max_elements_); 74 | } else { 75 | global_logger->warn("File not found: {}. Skipping loading index.", file_path); 76 | } 77 | } 78 | 79 | } // namespace vectordb -------------------------------------------------------------------------------- /src/index/index_factory.cpp: -------------------------------------------------------------------------------- 1 | #include "index/index_factory.h" 2 | #include "index/hnswlib_index.h" 3 | #include "index/filter_index.h" 4 | namespace vectordb { 5 | 6 | void IndexFactory::Init(IndexType type, int dim, int num_data,MetricType metric) { 7 | faiss::MetricType faiss_metric = (metric == MetricType::L2) ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT; 8 | 9 | switch (type) { 10 | case IndexType::FLAT: 11 | index_map_[type] = new vectordb::FaissIndex(new faiss::IndexIDMap(new faiss::IndexFlat(dim, faiss_metric))); 12 | break; 13 | case IndexType::HNSW: 14 | index_map_[type] = new vectordb::HNSWLibIndex(dim, num_data, metric, 16, 200); 15 | break; 16 | case IndexType::FILTER: // 初始化 FilterIndex 对象 17 | index_map_[type] = new FilterIndex(); 18 | break; 19 | default: 20 | break; 21 | } 22 | } 23 | 24 | auto IndexFactory::GetIndex(IndexType type) const -> void* { 25 | auto it = index_map_.find(type); 26 | if (it != index_map_.end()) { 27 | return it->second; 28 | } 29 | return nullptr; 30 | } 31 | 32 | void IndexFactory::SaveIndex(const std::string& folder_path) { // 添加 ScalarStorage 参数 33 | 34 | for (const auto& index_entry : index_map_) { 35 | IndexType index_type = index_entry.first; 36 | void* index = index_entry.second; 37 | 38 | // 为每个索引类型生成一个文件名 39 | std::string file_path = folder_path + std::to_string(static_cast(index_type)) + ".index"; 40 | 41 | // 根据索引类型调用相应的 saveIndex 函数 42 | if (index_type == IndexType::FLAT) { 43 | static_cast(index)->SaveIndex(file_path); 44 | } else if (index_type == IndexType::HNSW) { 45 | static_cast(index)->SaveIndex(file_path); 46 | } else if (index_type == IndexType::FILTER) { // 保存 FilterIndex 类型的索引 47 | static_cast(index)->SaveIndex(file_path); 48 | } 49 | } 50 | } 51 | 52 | void IndexFactory::LoadIndex(const std::string& folder_path) { // 添加 loadIndex 方法实现 53 | for (const auto& index_entry : index_map_) { 54 | IndexType index_type = index_entry.first; 55 | void* index = index_entry.second; 56 | 57 | // 为每个索引类型生成一个文件名 58 | std::string file_path = folder_path + std::to_string(static_cast(index_type)) + ".index"; 59 | 60 | // 根据索引类型调用相应的 loadIndex 函数 61 | if (index_type == IndexType::FLAT) { 62 | static_cast(index)->LoadIndex(file_path); 63 | } else if (index_type == IndexType::HNSW) { 64 | static_cast(index)->LoadIndex(file_path); 65 | } else if (index_type == IndexType::FILTER) { // 加载 FilterIndex 类型的索引 66 | static_cast(index)->LoadIndex(file_path); 67 | } 68 | } 69 | } 70 | 71 | } // namespace vectordb -------------------------------------------------------------------------------- /src/logger/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library( 2 | vectorDB_logger 3 | OBJECT 4 | logger.cpp) 5 | 6 | set(ALL_OBJECT_FILES 7 | ${ALL_OBJECT_FILES} $ 8 | PARENT_SCOPE) -------------------------------------------------------------------------------- /src/logger/logger.cpp: -------------------------------------------------------------------------------- 1 | #include "logger/logger.h" 2 | #include 3 | #include "spdlog/sinks/stdout_color_sinks.h" 4 | 5 | namespace vectordb { 6 | std::shared_ptr global_logger; 7 | 8 | void InitGlobalLogger(const std::string &log_name) { 9 | global_logger = spdlog::stdout_color_mt(log_name); 10 | } 11 | 12 | void SetLogLevel(spdlog::level::level_enum log_level) { 13 | global_logger->set_level(log_level); 14 | } 15 | } // namespace vectordb 16 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | include(GoogleTest) 4 | 5 | file(GLOB_RECURSE VECTORDB_TEST_SOURCES "${PROJECT_SOURCE_DIR}/test/*/*test.cpp") 6 | 7 | # ##################################################################################################################### 8 | # MAKE TARGETS 9 | # ##################################################################################################################### 10 | 11 | # ######################################### 12 | # "make check-tests" 13 | # ######################################### 14 | add_custom_target(build-tests COMMAND ${CMAKE_CTEST_COMMAND} --show-only) 15 | add_custom_target(check-tests COMMAND ${CMAKE_CTEST_COMMAND} --verbose) 16 | 17 | 18 | # ######################################### 19 | # "make XYZ_test" 20 | # ######################################### 21 | foreach (vectordb_test_source ${VECTORDB_TEST_SOURCES}) 22 | # Create a human readable name. 23 | get_filename_component(vectordb_test_filename ${vectordb_test_source} NAME) 24 | string(REPLACE ".cpp" "" vectordb_test_name ${vectordb_test_filename}) 25 | 26 | # Add the test target separately and as part of "make check-tests". 27 | #add_executable(${vectordb_test_name} EXCLUDE_FROM_ALL ${vectordb_test_source}) 28 | add_executable(${vectordb_test_name} EXCLUDE_FROM_ALL ${vectordb_test_source} "${PROJECT_SOURCE_DIR}/tools/backtrace.cpp") 29 | add_backward(${vectordb_test_name}) 30 | add_dependencies(build-tests ${vectordb_test_name}) 31 | add_dependencies(check-tests ${vectordb_test_name}) 32 | 33 | gtest_discover_tests(${vectordb_test_name} 34 | EXTRA_ARGS 35 | --gtest_output=xml:${CMAKE_BINARY_DIR}/test/${vectordb_test_name}.xml 36 | --gtest_catch_exceptions=0 37 | DISCOVERY_TIMEOUT 120 38 | PROPERTIES 39 | TIMEOUT 120 40 | ) 41 | 42 | target_link_libraries(${vectordb_test_name} vectorDB gtest gmock_main) 43 | 44 | # Set test target properties and dependencies. 45 | set_target_properties(${vectordb_test_name} 46 | PROPERTIES 47 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test" 48 | COMMAND ${vectordb_test_name} 49 | ) 50 | endforeach () 51 | 52 | 53 | -------------------------------------------------------------------------------- /test/cfg/cfg_test.cpp: -------------------------------------------------------------------------------- 1 | #include "index/faiss_index.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "gtest/gtest.h" 9 | #include "common/vector_cfg.h" 10 | namespace vectordb { 11 | 12 | // NOLINTNEXTLINE 13 | TEST(CfgTest, SampleTest){ 14 | auto path_str = GetCfgPath(); 15 | EXPECT_EQ(path_str, "/home/zhouzj/vectorDB/vectorDB/vectordb_config"); 16 | Cfg::CfgPath(path_str); 17 | EXPECT_EQ(Cfg::Instance().RocksDbPath(), "/home/zhouzj/vectordb/storage"); 18 | EXPECT_EQ(Cfg::Instance().GlogLevel(), 1); 19 | EXPECT_EQ(Cfg::Instance().GlogName(), "my_log"); 20 | } 21 | } // namespace vectordb -------------------------------------------------------------------------------- /test/database/database_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "common/vector_init.h" 4 | #include "database/vector_database.h" 5 | #include "gtest/gtest.h" 6 | #include "index/faiss_index.h" 7 | #include "index/index_factory.h" 8 | #include 9 | namespace vectordb { 10 | // NOLINTNEXTLINE 11 | TEST(DatabaseTest, SampleTest) { 12 | Init(); 13 | std::experimental::filesystem::remove_all(Cfg::Instance().TestRocksDbPath()); 14 | VectorDatabase db(Cfg::Instance().TestRocksDbPath(),Cfg::Instance().TestWalPath()); 15 | rapidjson::Document doc; 16 | doc.SetObject(); 17 | rapidjson::Document::AllocatorType &allocator = doc.GetAllocator(); 18 | rapidjson::Value vectors(rapidjson::kArrayType); 19 | vectors.PushBack(10, allocator); 20 | doc.AddMember("vectors", vectors, allocator); 21 | 22 | IndexFactory::IndexType index_type = IndexFactory::IndexType::FLAT; 23 | db.Upsert(1, doc, index_type); 24 | auto res = db.Query(1); 25 | std::vector vec; 26 | vec.clear(); 27 | for (const auto &d : res["vectors"].GetArray()) { 28 | vec.push_back(d.GetFloat()); 29 | } 30 | EXPECT_EQ(int(vec[0]), 10); 31 | 32 | 33 | rapidjson::Document doc2; 34 | doc2.SetObject(); 35 | rapidjson::Document::AllocatorType &allocator2 = doc2.GetAllocator(); 36 | 37 | rapidjson::Value vectors2(rapidjson::kArrayType); 38 | vectors2.PushBack(11, allocator2); 39 | 40 | doc2.AddMember("vectors", vectors2, allocator2); 41 | 42 | db.Upsert(1, doc2, index_type); 43 | auto res2 = db.Query(1); 44 | vec.clear(); 45 | for (const auto &d : res2["vectors"].GetArray()) { 46 | vec.push_back(d.GetFloat()); 47 | } 48 | EXPECT_EQ(int(vec[0]), 11); 49 | } 50 | } // namespace vectordb -------------------------------------------------------------------------------- /test/database/scalar_storage_test.cpp: -------------------------------------------------------------------------------- 1 | #include "index/faiss_index.h" 2 | #include 3 | #include 4 | #include 5 | #include "gtest/gtest.h" 6 | #include "index/index_factory.h" 7 | #include "database/scalar_storage.h" 8 | #include "common/vector_init.h" 9 | namespace vectordb { 10 | // NOLINTNEXTLINE 11 | TEST(ScalarTest, SampleTest){ 12 | Init(); 13 | std::experimental::filesystem::remove_all(Cfg::Instance().TestRocksDbPath()); 14 | ScalarStorage storage(Cfg::Instance().TestRocksDbPath()); 15 | 16 | rapidjson::Document doc; 17 | doc.SetObject(); 18 | rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); 19 | 20 | // 添加retCode到响应 21 | doc.AddMember("value", 10, allocator); 22 | storage.InsertScalar(1,doc); 23 | auto res1 = storage.GetScalar(1); 24 | EXPECT_EQ(res1["value"],10); 25 | 26 | 27 | rapidjson::Document doc2; 28 | doc2.SetObject(); 29 | rapidjson::Document::AllocatorType& allocator2 = doc2.GetAllocator(); 30 | 31 | // 添加retCode到响应 32 | doc2.AddMember("value", 11, allocator2); 33 | storage.InsertScalar(1,doc2); 34 | auto res2 = storage.GetScalar(1); 35 | EXPECT_EQ(res2["value"],11); 36 | 37 | } 38 | } // namespace vectordb -------------------------------------------------------------------------------- /test/etcd/etcd_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "common/vector_init.h" 7 | #include "gtest/gtest.h" 8 | // 需要先手动开启etcd server 9 | namespace vectordb { 10 | // NOLINTNEXTLINE 11 | TEST(ETCDTest, SampleTest) { 12 | VdbServerInit(1); 13 | etcd::Client etcd("http://127.0.0.1:2379"); 14 | 15 | // 设置键值对 16 | etcd::Response response = etcd.set("foo", "bar").get(); 17 | if (response.is_ok()) { 18 | std::cout << "Key set successfully" << std::endl; 19 | } else { 20 | std::cerr << "Error: " << response.error_message() << std::endl; 21 | } 22 | EXPECT_EQ(response.is_ok(), true); 23 | 24 | // 获取键值对 25 | response = etcd.get("foo").get(); 26 | if (response.is_ok()) { 27 | std::cout << "Value: " << response.value().as_string() << std::endl; 28 | } else { 29 | std::cerr << "Error: " << response.error_message() << std::endl; 30 | } 31 | EXPECT_EQ(response.value().as_string(), "bar"); 32 | } 33 | 34 | } // namespace vectordb -------------------------------------------------------------------------------- /test/index/faiss_index_test.cpp: -------------------------------------------------------------------------------- 1 | #include "index/faiss_index.h" 2 | #include 3 | #include 4 | #include "common/vector_init.h" 5 | #include "gtest/gtest.h" 6 | #include "index/index_factory.h" 7 | namespace vectordb { 8 | // NOLINTNEXTLINE 9 | TEST(IndexTest, FaissSampleTest) { 10 | VdbServerInit(1); 11 | auto &indexfactory = IndexFactory::Instance(); 12 | auto index_type = IndexFactory::IndexType::FLAT; 13 | void *index = indexfactory.GetIndex(IndexFactory::IndexType::FLAT); 14 | EXPECT_NE(index, nullptr); 15 | 16 | // 根据索引类型初始化索引对象并调用insert_vectors函数 17 | 18 | std::vector base_data{0.8}; 19 | uint64_t base_label = 3; 20 | switch (index_type) { 21 | case IndexFactory::IndexType::FLAT: { 22 | auto *faiss_index = static_cast(index); 23 | EXPECT_NE(index, nullptr); 24 | faiss_index->InsertVectors(base_data, base_label); 25 | break; 26 | } 27 | // 在此处添加其他索引类型的处理逻辑 28 | default: 29 | break; 30 | } 31 | 32 | std::vector query{0.8}; 33 | int k = 1; 34 | std::pair, std::vector> results; 35 | switch (index_type) { 36 | case IndexFactory::IndexType::FLAT: { 37 | auto *faiss_index = static_cast(index); 38 | results = faiss_index->SearchVectors(query, k); 39 | break; 40 | } 41 | // 在此处添加其他索引类型的处理逻辑 42 | default: 43 | break; 44 | } 45 | 46 | EXPECT_EQ(results.first.size(), 1); 47 | EXPECT_EQ(results.second.size(), 1); 48 | EXPECT_EQ(results.first.at(0), 3); 49 | 50 | switch (index_type) { 51 | case IndexFactory::IndexType::FLAT: { 52 | auto *faiss_index = static_cast(index); 53 | faiss_index->RemoveVectors({3}); 54 | break; 55 | } 56 | // 在此处添加其他索引类型的处理逻辑 57 | default: 58 | break; 59 | } 60 | 61 | std::pair, std::vector> results2; 62 | switch (index_type) { 63 | case IndexFactory::IndexType::FLAT: { 64 | auto *faiss_index = static_cast(index); 65 | results2 = faiss_index->SearchVectors(query, k); 66 | break; 67 | } 68 | // 在此处添加其他索引类型的处理逻辑 69 | default: 70 | break; 71 | } 72 | 73 | EXPECT_EQ(results2.first.at(0), -1); 74 | 75 | rapidjson::Document data; 76 | data.SetObject(); 77 | rapidjson::Document::AllocatorType &allocator = data.GetAllocator(); 78 | 79 | rapidjson::Value vectors(rapidjson::kArrayType); 80 | vectors.PushBack(0.7, allocator); 81 | 82 | data.AddMember("vectors", vectors, allocator); 83 | 84 | // 将新向量插入索引 85 | std::vector new_vector(data["vectors"].Size()); // 从JSON数据中提取vectors字段 86 | for (rapidjson::SizeType i = 0; i < data["vectors"].Size(); ++i) { 87 | new_vector[i] = data["vectors"][i].GetFloat(); 88 | } 89 | uint64_t label = 1; 90 | switch (index_type) { 91 | case IndexFactory::IndexType::FLAT: { 92 | auto *faiss_index = static_cast(index); 93 | faiss_index->InsertVectors(new_vector, static_cast(label)); 94 | break; 95 | } 96 | default: 97 | break; 98 | } 99 | 100 | switch (index_type) { 101 | case IndexFactory::IndexType::FLAT: { 102 | auto *faiss_index = static_cast(index); 103 | faiss_index->RemoveVectors({static_cast(label)}); // 将id转换为long类型 104 | break; 105 | } 106 | default: 107 | break; 108 | } 109 | } 110 | } // namespace vectordb -------------------------------------------------------------------------------- /test/index/filter_index_test.cpp: -------------------------------------------------------------------------------- 1 | #include "index/filter_index.h" 2 | #include 3 | #include 4 | #include "common/vector_init.h" 5 | #include "gtest/gtest.h" 6 | #include "index/index_factory.h" 7 | 8 | namespace vectordb { 9 | // NOLINTNEXTLINE 10 | TEST(IndexTest, FilterSampleTest) { 11 | Init(); 12 | auto &indexfactory = IndexFactory::Instance(); 13 | auto index_type = IndexFactory::IndexType::FILTER; 14 | void *index = indexfactory.GetIndex(index_type); 15 | EXPECT_NE(index, nullptr); 16 | 17 | auto *filter_index = static_cast(index); 18 | filter_index->AddIntFieldFilter("index", 10, 1); 19 | auto filter_bitmap = roaring_bitmap_create(); 20 | filter_index->GetIntFieldFilterBitmap("index", FilterIndex::Operation::EQUAL, 10, filter_bitmap); 21 | EXPECT_EQ(roaring_bitmap_contains(filter_bitmap, static_cast(1)), true); 22 | 23 | int64_t *old_field_value_p = nullptr; 24 | // 如果存在现有向量,则从 FilterIndex 中更新 int 类型字段 25 | old_field_value_p = static_cast(malloc(sizeof(int64_t))); 26 | *old_field_value_p = 10; 27 | filter_index->UpdateIntFieldFilter("index", old_field_value_p, 20, 1); 28 | // auto filter_bitmap2 = roaring_bitmap_create(); 29 | filter_index->GetIntFieldFilterBitmap("index", FilterIndex::Operation::EQUAL, 20, filter_bitmap); 30 | EXPECT_EQ(roaring_bitmap_contains(filter_bitmap, static_cast(1)), true); 31 | // auto filter_bitmap3 = roaring_bitmap_create(); 32 | filter_index->GetIntFieldFilterBitmap("index", FilterIndex::Operation::EQUAL, 10, filter_bitmap); 33 | EXPECT_EQ(roaring_bitmap_contains(filter_bitmap, static_cast(1)), false); 34 | } 35 | } // namespace vectordb -------------------------------------------------------------------------------- /test/index/hnsw_index_test.cpp: -------------------------------------------------------------------------------- 1 | #include "index/faiss_index.h" 2 | #include "index/hnswlib_index.h" 3 | #include 4 | #include 5 | #include "gtest/gtest.h" 6 | #include "index/index_factory.h" 7 | #include "common/vector_init.h" 8 | namespace vectordb { 9 | // NOLINTNEXTLINE 10 | TEST(IndexTest, HNSWSampleTest) { 11 | Init(); 12 | int dim = 1; // 向量维度 13 | auto &indexfactory = IndexFactory::Instance(); 14 | IndexFactory::IndexType index_type = IndexFactory::IndexType::HNSW; 15 | indexfactory.Init(index_type, dim,100); 16 | 17 | void *index = indexfactory.GetIndex(IndexFactory::IndexType::HNSW); 18 | EXPECT_NE(index, nullptr); 19 | 20 | // 根据索引类型初始化索引对象并调用insert_vectors函数 21 | 22 | std::vector base_data{0.8}; 23 | uint64_t base_label = 3; 24 | switch (index_type) { 25 | case IndexFactory::IndexType::HNSW: { 26 | auto *hnsw_index = static_cast(index); 27 | EXPECT_NE(index, nullptr); 28 | hnsw_index->InsertVectors(base_data, base_label); 29 | break; 30 | } 31 | // 在此处添加其他索引类型的处理逻辑 32 | default: 33 | break; 34 | } 35 | 36 | std::vector query{0.8}; 37 | int k = 1; 38 | std::pair, std::vector> results; 39 | switch (index_type) { 40 | case IndexFactory::IndexType::HNSW: { 41 | auto *hnsw_index = static_cast(index); 42 | results = hnsw_index->SearchVectors(query, k); 43 | break; 44 | } 45 | // 在此处添加其他索引类型的处理逻辑 46 | default: 47 | break; 48 | } 49 | 50 | EXPECT_EQ(results.first.size(), 1); 51 | EXPECT_EQ(results.second.size(), 1); 52 | EXPECT_EQ(results.first.at(0), 3); 53 | 54 | switch (index_type) { 55 | case IndexFactory::IndexType::HNSW: { 56 | auto *hnsw_index = static_cast(index); 57 | hnsw_index->RemoveVectors({3}); 58 | break; 59 | } 60 | // 在此处添加其他索引类型的处理逻辑 61 | default: 62 | break; 63 | } 64 | 65 | std::pair, std::vector> results2; 66 | switch (index_type) { 67 | case IndexFactory::IndexType::HNSW: { 68 | auto *hnsw_index = static_cast(index); 69 | results2 = hnsw_index->SearchVectors(query, k); 70 | break; 71 | } 72 | // 在此处添加其他索引类型的处理逻辑 73 | default: 74 | break; 75 | } 76 | 77 | EXPECT_EQ(results2.first.at(0), -1); 78 | 79 | 80 | 81 | } 82 | } // namespace vectordb -------------------------------------------------------------------------------- /test/test.sh: -------------------------------------------------------------------------------- 1 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.8], "id": 2, "indexType": "FLAT"}' http://localhost:7781/UserService/insert 2 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.5], "k": 2, "indexType": "FLAT"}' http://localhost:7781/UserService/search 3 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.555], "id":3, "indexType": "FLAT","Name":"hello","Ci":1111}' http://localhost:7781/UserService/upsert 4 | curl -X POST -H "Content-Type: application/json" -d '{"id": 3}' http://localhost:7781/UserService/query 5 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.999], "id":6, "int_field":47,"indexType": "FLAT"}' http://localhost:7781/UserService/upsert 6 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.888], "id":7, "int_field":48,"indexType": "FLAT"}' http://localhost:7781/UserService/upsert 7 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.999], "k": 5 , "indexType": "FLAT","filter":{"fieldName":"int_field","value":47,"op":"="}}' http://localhost:7781/UserService/search 8 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.999], "k": 5 , "indexType": "FLAT","filter":{"fieldName":"int_field","value":47,"op":"!="}}' http://localhost:7781/UserService/search 9 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.888], "k": 1, "indexType": "FLAT","filter":{"fieldName":"int_field","value":48,"op":"="}}' http://localhost:7781/UserService/search 10 | curl -X POST -H "Content-Type: application/json" -d '{}' http://localhost:7781/AdminService/snapshot 11 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.999], "k": 1 , "indexType": "FLAT","filter":{"fieldName":"int_field","value":47,"op":"="}}' http://localhost:7781/UserService/search 12 | curl -X POST -H "Content-Type: application/json" -d '{}' http://localhost:7781/AdminService/SetLeader 13 | curl -X GET http://localhost:7781/AdminService/ListNode 14 | curl -X GET http://localhost:7781/AdminService/GetNode 15 | curl -X POST -H "Content-Type: application/json" -d '{"nodeId": 2, "endpoint": "127.0.0.1:8082"}' http://localhost:7781/AdminService/AddFollower 16 | curl -X POST -H "Content-Type: application/json" -d '{"nodeId": 3, "endpoint": "127.0.0.1:8083"}' http://localhost:7781/AdminService/AddFollower 17 | 18 | #查看node信息 19 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId" : 1,"nodeId": 1}' http://localhost:6060/MasterService/GetNodeInfo 20 | #增加node1信息 21 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId": 1, "nodeId": 1, "url": "http://127.0.0.1:7781", "role": 0, "status": 0}' http://localhost:6060/MasterService/AddNode 22 | #查看instance下的所有node信息 23 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId" : 1}' http://localhost:6060/MasterService/GetInstance 24 | #增加node2信息 25 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId": 1, "nodeId": 2, "url": "http://127.0.0.1:7782", "role": 1, "status": 0}' http://localhost:6060/MasterService/AddNode 26 | #删除node2信息 27 | curl -X DELETE -H "Content-Type: application/json" -d '{"instanceId" : 1,"nodeId": 2}' http://localhost:6060/MasterService/RemoveNode 28 | #增加node3信息 29 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId": 1, "nodeId": 3, "url": "http://127.0.0.1:7783", "role": 1, "status": 0}' http://localhost:6060/MasterService/AddNode 30 | #删除node3信息 31 | curl -X DELETE -H "Content-Type: application/json" -d '{"instanceId" : 1,"nodeId": 3}' http://localhost:6060/MasterService/RemoveNode 32 | #增加node4信息 33 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId": 1, "nodeId": 4, "url": "http://127.0.0.1:7784", "role": 0, "status": 0}' http://localhost:6060/MasterService/AddNode 34 | #删除node3信息 35 | curl -X DELETE -H "Content-Type: application/json" -d '{"instanceId" : 1,"nodeId": 4}' http://localhost:6060/MasterService/RemoveNode 36 | 37 | #查看top结构 38 | curl -X GET http://localhost:6061/ProxyService/topology 39 | 40 | #读请求 41 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.9], "k": 5, "indexType": "FLAT", "filter":{"fieldName":"int_field","value":49, "op":"="}}' http://localhost:6061/ProxyService/search 42 | 43 | #写请求 44 | curl -X POST -H "Content-Type: application/json" -d '{"id": 6, "vectors": [0.9], "int_field": 49, "indexType": "FLAT"}' http://localhost:6061/ProxyService/upsert 45 | 46 | #强制读主 47 | curl -X POST -H "Content-Type: application/json" -d '{"vectors": [0.89], "k": 5, "indexType": "FLAT", "filter":{"fieldName":"int_field","value":49 ,"op":"="},"forceMaster" : true}' http://localhost:6061/ProxyService/search 48 | 49 | 50 | #更新分区信息: 51 | curl -X POST http://localhost:6060/MasterService/UpdatePartitionConfig -H "Content-Type: application/json" -d '{ 52 | "instanceId": 1, 53 | "partitionKey": "id", 54 | "numberOfPartitions": 2, 55 | "partitions": [ 56 | {"partitionId": 0, "nodeId": 1}, 57 | {"partitionId": 0, "nodeId": 2}, 58 | {"partitionId": 0, "nodeId": 3}, 59 | {"partitionId": 1, "nodeId": 4} 60 | ] 61 | }' 62 | 63 | #获取分区信息: 64 | 65 | curl -X POST -H "Content-Type: application/json" -d '{"instanceId" : 1}' http://localhost:6060/MasterService/GetPartitionConfig 66 | 67 | 68 | 69 | #更新分区信息: 70 | curl -X POST http://localhost:6060/MasterService/UpdatePartitionConfig -H "Content-Type: application/json" -d '{ 71 | "instanceId": 1, 72 | "partitionKey": "id", 73 | "numberOfPartitions": 1, 74 | "partitions": [ 75 | {"partitionId": 0, "nodeId": 1}, 76 | {"partitionId": 0, "nodeId": 2}, 77 | {"partitionId": 0, "nodeId": 3}, 78 | ] 79 | }' -------------------------------------------------------------------------------- /test/test/gtest_test.cpp: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | 3 | namespace vectordb { 4 | // NOLINTNEXTLINE 5 | TEST(TestTest, SampleTest){ 6 | int res = 4; 7 | EXPECT_EQ(res, 4); 8 | } 9 | } -------------------------------------------------------------------------------- /third_party/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(third_party_lib INTERFACE) 2 | 3 | set(STATIC_LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/installed/lib) 4 | 5 | set(LIB_LIST 6 | faiss 7 | openblas 8 | brpc 9 | gflags 10 | protobuf 11 | glog 12 | crypto 13 | leveldb 14 | ssl 15 | z 16 | rocksdb 17 | snappy 18 | lz4 19 | bz2 20 | roaring 21 | nuraft 22 | curl 23 | etcd-cpp-api 24 | ) 25 | 26 | foreach(lib_name IN LISTS LIB_LIST) 27 | set(lib_path ${STATIC_LIB_DIR}/lib${lib_name}) 28 | 29 | # 检查库文件类型 (.a 或 .so) 30 | if(EXISTS ${lib_path}.a) 31 | # 如果是静态库 32 | message(STATUS "Importing static library ${lib_name} ${lib_path}.a") 33 | add_library(${lib_name} STATIC IMPORTED) 34 | set_target_properties(${lib_name} PROPERTIES IMPORTED_LOCATION ${lib_path}.a) 35 | elseif(EXISTS ${lib_path}.so) 36 | # 如果是共享库 37 | message(STATUS "Importing shared library ${lib_name} ${lib_path}.so") 38 | add_library(${lib_name} SHARED IMPORTED) 39 | set_target_properties(${lib_name} PROPERTIES IMPORTED_LOCATION ${lib_path}.so) 40 | else() 41 | message(WARNING "Library ${lib_name} not found in ${STATIC_LIB_DIR}") 42 | endif() 43 | 44 | # 将库链接到 third_party_lib 45 | target_link_libraries(third_party_lib INTERFACE ${lib_name}) 46 | endforeach() 47 | 48 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/googletest-1.15.2) 49 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/backward-cpp-1.6) 50 | set(THIRD_PARTY_LIB ${THIRD_PARTY_LIB} third_party_lib PARENT_SCOPE) 51 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/proto) 52 | set(PROTO_LIB ${PROTO_LIB} PARENT_SCOPE) 53 | 54 | -------------------------------------------------------------------------------- /third_party/patches/brpc-1.11.0.patch: -------------------------------------------------------------------------------- 1 | diff --git a/CMakeLists.txt b/CMakeLists.txt 2 | index e29a1c3..7369f19 100644 3 | --- a/CMakeLists.txt 4 | +++ b/CMakeLists.txt 5 | @@ -307,6 +307,7 @@ if(WITH_RDMA) 6 | list(APPEND DYNAMIC_LIB ${RDMA_LIB}) 7 | endif() 8 | 9 | +link_directories(${CMAKE_LIBRARY_PATH}) 10 | set(BRPC_PRIVATE_LIBS "-lgflags -lprotobuf -lleveldb -lprotoc -lssl -lcrypto -ldl -lz") 11 | 12 | if(WITH_GLOG) 13 | -------------------------------------------------------------------------------- /third_party/patches/faiss-1.9.0.patch: -------------------------------------------------------------------------------- 1 | diff --git a/faiss/CMakeLists.txt b/faiss/CMakeLists.txt 2 | index 2871d97..2cf23bc 100644 3 | --- a/faiss/CMakeLists.txt 4 | +++ b/faiss/CMakeLists.txt 5 | @@ -354,13 +354,16 @@ target_link_libraries(faiss_avx2 PRIVATE OpenMP::OpenMP_CXX) 6 | target_link_libraries(faiss_avx512 PRIVATE OpenMP::OpenMP_CXX) 7 | target_link_libraries(faiss_sve PRIVATE OpenMP::OpenMP_CXX) 8 | 9 | +option(TP_INSTALL_DIR "Third party install dir") 10 | +message("Third party install dir: ${TP_INSTALL_DIR}") 11 | + 12 | find_package(MKL) 13 | if(MKL_FOUND) 14 | target_link_libraries(faiss PRIVATE ${MKL_LIBRARIES}) 15 | target_link_libraries(faiss_avx2 PRIVATE ${MKL_LIBRARIES}) 16 | target_link_libraries(faiss_avx512 PRIVATE ${MKL_LIBRARIES}) 17 | else() 18 | - find_package(BLAS REQUIRED) 19 | + find_library(BLAS_LIBRARIES NAMES openblas PATHS ${TP_INSTALL_DIR}/lib) 20 | target_link_libraries(faiss PRIVATE ${BLAS_LIBRARIES}) 21 | target_link_libraries(faiss_avx2 PRIVATE ${BLAS_LIBRARIES}) 22 | target_link_libraries(faiss_avx512 PRIVATE ${BLAS_LIBRARIES}) 23 | -------------------------------------------------------------------------------- /third_party/proto/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB PROTO_SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.proto") 2 | 3 | set(PROTO_CC_FILES) 4 | 5 | foreach(proto_file ${PROTO_SRC_FILES}) 6 | get_filename_component(proto_name ${proto_file} NAME_WE) 7 | set(pb_cc "${proto_name}.pb.cc") 8 | set(pb_h "${proto_name}.pb.h") 9 | 10 | list(APPEND PROTO_CC_FILES ${pb_cc}) 11 | endforeach() 12 | 13 | add_custom_target(GenerateProtoFiles ALL DEPENDS ${PROTO_CC_FILES}) 14 | 15 | add_library(proto_cc OBJECT ${PROTO_CC_FILES}) 16 | 17 | add_dependencies(proto_cc GenerateProtoFiles) 18 | 19 | set(PROTO_LIB ${PROTO_LIB} proto_cc PARENT_SCOPE) 20 | -------------------------------------------------------------------------------- /third_party/proto/http.proto: -------------------------------------------------------------------------------- 1 | syntax="proto3"; 2 | 3 | package nvm; 4 | 5 | option cc_generic_services = true; 6 | 7 | // H2 8 | message HttpRequest { }; 9 | message HttpResponse { }; 10 | 11 | service UserService { 12 | rpc search(HttpRequest) returns (HttpResponse); 13 | rpc insert(HttpRequest) returns (HttpResponse); 14 | rpc upsert(HttpRequest) returns (HttpResponse); 15 | rpc query(HttpRequest) returns (HttpResponse); 16 | }; 17 | 18 | service ProxyService { 19 | rpc upsert(HttpRequest) returns (HttpResponse); 20 | rpc search(HttpRequest) returns (HttpResponse); 21 | rpc topology(HttpRequest) returns (HttpResponse); 22 | }; 23 | 24 | service MasterService { 25 | rpc GetNodeInfo(HttpRequest) returns (HttpResponse); 26 | rpc AddNode(HttpRequest) returns (HttpResponse); 27 | rpc RemoveNode(HttpRequest) returns (HttpResponse); 28 | rpc GetInstance(HttpRequest) returns (HttpResponse); 29 | rpc GetPartitionConfig(HttpRequest) returns (HttpResponse); 30 | rpc UpdatePartitionConfig(HttpRequest) returns (HttpResponse); 31 | }; 32 | 33 | 34 | service AdminService { 35 | rpc snapshot(HttpRequest) returns (HttpResponse); 36 | rpc SetLeader(HttpRequest) returns (HttpResponse); 37 | rpc AddFollower(HttpRequest) returns (HttpResponse); 38 | rpc ListNode(HttpRequest) returns (HttpResponse); 39 | rpc GetNode(HttpRequest) returns (HttpResponse); 40 | }; 41 | 42 | -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(server) 2 | add_backward(vdb_server) -------------------------------------------------------------------------------- /tools/backtrace.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "backward.hpp" 4 | 5 | class TerminateHandler { 6 | public: 7 | TerminateHandler() { 8 | std::set_terminate([]() { 9 | backward::StackTrace st; 10 | st.load_here(32); 11 | backward::Printer p; 12 | p.object = true; 13 | p.snippet = false; 14 | p.color_mode = backward::ColorMode::automatic; 15 | p.address = true; 16 | p.print(st, stderr); 17 | std::abort(); 18 | }); 19 | } 20 | }; 21 | 22 | TerminateHandler th; 23 | -------------------------------------------------------------------------------- /tools/server/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # set(VDB_SERVER_SOURCES vdb_server.cpp) 2 | # add_executable(vdb_server ${VDB_SERVER_SOURCES}) 3 | 4 | # target_link_libraries(vdb_server vectorDB) 5 | # set_target_properties(vdb_server PROPERTIES OUTPUT_NAME vdb_server) 6 | # 定义源文件和目标名称的映射 7 | set(EXECUTABLES 8 | vdb_server:vdb_server.cpp 9 | vdb_server_master:vdb_server_master.cpp 10 | vdb_server_proxy:vdb_server_proxy.cpp 11 | ) 12 | 13 | # 遍历每个目标,创建可执行文件并设置属性 14 | foreach(EXECUTABLE_CONFIG ${EXECUTABLES}) 15 | # 使用 ":" 分割目标名称和源文件 16 | string(REPLACE ":" ";" SPLIT_CONFIG ${EXECUTABLE_CONFIG}) 17 | list(GET SPLIT_CONFIG 0 EXEC_NAME) 18 | list(GET SPLIT_CONFIG 1 SOURCE_FILE) 19 | 20 | # 创建可执行文件 21 | add_executable(${EXEC_NAME} ${SOURCE_FILE}) 22 | 23 | # 链接到 vectorDB 库 24 | target_link_libraries(${EXEC_NAME} vectorDB) 25 | 26 | # 设置输出文件名 27 | set_target_properties(${EXEC_NAME} PROPERTIES OUTPUT_NAME ${EXEC_NAME}) 28 | endforeach() -------------------------------------------------------------------------------- /tools/server/vdb_server.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "common/vector_cfg.h" 3 | #include "common/vector_init.h" 4 | #include "httpserver/http_server.h" 5 | #include "index/index_factory.h" 6 | #include "logger/logger.h" 7 | 8 | // NOLINTNEXTLINE 9 | auto main(int argc, char *argv[]) -> int { 10 | int node_id = 1; 11 | if (argc == 2) { 12 | node_id = std::atoi(argv[1]); // Convert argument to integer if provided 13 | std::cout << "The node_id you input is " << node_id << std::endl; 14 | } else { 15 | std::cout << "No number provided, using default value: " << node_id << std::endl; 16 | } 17 | vectordb::VdbServerInit(node_id); 18 | vectordb::global_logger->info("Global IndexFactory initialized"); 19 | 20 | // 创建并启动HTTP服务器 21 | 22 | vectordb::VectorDatabase vector_database(vectordb::Cfg::Instance().RocksDbPath(), 23 | vectordb::Cfg::Instance().WalPath()); 24 | vector_database.ReloadDatabase(); 25 | vectordb::global_logger->info("VectorDatabase initialized"); 26 | 27 | int raft_node_id = vectordb::Cfg::Instance().RaftNodeId(); 28 | std::string endpoint = vectordb::Cfg::Instance().RaftEndpoint(); 29 | int raft_port = vectordb::Cfg::Instance().RaftPort(); 30 | 31 | vectordb::RaftStuff raft_stuff(raft_node_id, endpoint, raft_port, &vector_database); 32 | vectordb::global_logger->info("RaftStuff object created with node_id: {}, endpoint: {}, port: {}", raft_node_id, endpoint, 33 | raft_port); // 添加调试日志 34 | 35 | vectordb::HttpServer server; 36 | server.Init(&vector_database, &raft_stuff); 37 | vectordb::global_logger->info("HttpServer created"); 38 | 39 | std::string server_addr = 40 | vectordb::Cfg::Instance().Address() + ":" + std::to_string(vectordb::Cfg::Instance().Port()); 41 | LOG(INFO) << "listen at:" << server_addr; 42 | 43 | brpc::ServerOptions options; 44 | if (server.Start(server_addr.c_str(), &options) != 0) { 45 | LOG(ERROR) << "Failed to start server"; 46 | return -1; 47 | } 48 | 49 | server.RunUntilAskedToQuit(); 50 | 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /tools/server/vdb_server_master.cpp: -------------------------------------------------------------------------------- 1 | #include "common/master_cfg.h" 2 | #include "common/vector_init.h" 3 | #include "httpserver/master_server.h" 4 | #include "index/index_factory.h" 5 | #include "logger/logger.h" 6 | 7 | auto main() -> int { 8 | vectordb::MasterServerInit(); 9 | 10 | vectordb::MasterServer server; 11 | server.Init(vectordb::MasterCfg::Instance().EtcdEndpoints()); 12 | vectordb::global_logger->info("MasterServer created"); 13 | 14 | std::string server_addr = 15 | vectordb::MasterCfg::Instance().Address() + ":" + std::to_string(vectordb::MasterCfg::Instance().Port()); 16 | LOG(INFO) << "listen at:" << server_addr; 17 | 18 | brpc::ServerOptions options; 19 | if (server.Start(server_addr.c_str(), &options) != 0) { 20 | LOG(ERROR) << "Failed to start server"; 21 | return -1; 22 | } 23 | 24 | server.RunUntilAskedToQuit(); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /tools/server/vdb_server_proxy.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "common/proxy_cfg.h" 3 | #include "common/vector_init.h" 4 | #include "httpserver/proxy_server.h" 5 | #include "index/index_factory.h" 6 | #include "logger/logger.h" 7 | 8 | auto main() -> int { 9 | vectordb::ProxyServerInit(); 10 | 11 | vectordb::ProxyServer server; 12 | server.Init(vectordb::ProxyCfg::Instance().MasterHost(), vectordb::ProxyCfg::Instance().MasterPort(), 13 | vectordb::ProxyCfg::Instance().InstanceId(), vectordb::ProxyCfg::Instance().ReadPaths(), 14 | vectordb::ProxyCfg::Instance().WritePaths()); 15 | vectordb::global_logger->info("ProxyServer created"); 16 | 17 | std::string server_addr = 18 | vectordb::ProxyCfg::Instance().Address() + ":" + std::to_string(vectordb::ProxyCfg::Instance().Port()); 19 | LOG(INFO) << "listen at:" << server_addr; 20 | 21 | brpc::ServerOptions options; 22 | if (server.Start(server_addr.c_str(), &options) != 0) { 23 | LOG(ERROR) << "Failed to start server"; 24 | return -1; 25 | } 26 | 27 | server.RunUntilAskedToQuit(); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /vectordb_config: -------------------------------------------------------------------------------- 1 | { 2 | "CLUSTER_INFO" :[ 3 | { 4 | "RAFT":{ 5 | "NODE_ID":1, 6 | "ENDPOINT":"127.0.0.1:8081", 7 | "PORT":8081 8 | }, 9 | "ROCKS_DB_PATH" : "/home/zhouzj/vectordb1/storage", 10 | "WAL_PATH" : "/home/zhouzj/vectordb1/wal", 11 | "SNAP_PATH" : "/home/zhouzj/vectordb1/snap/", 12 | "ADDRESS" : "0.0.0.0", 13 | "PORT" : 7781 14 | }, 15 | { 16 | "RAFT":{ 17 | "NODE_ID":2, 18 | "ENDPOINT":"127.0.0.1:8082", 19 | "PORT":8082 20 | }, 21 | "ROCKS_DB_PATH" : "/home/zhouzj/vectordb2/storage", 22 | "WAL_PATH" : "/home/zhouzj/vectordb2/wal", 23 | "SNAP_PATH" : "/home/zhouzj/vectordb2/snap/", 24 | "ADDRESS" : "0.0.0.0", 25 | "PORT" : 7782 26 | 27 | }, 28 | { 29 | "RAFT":{ 30 | "NODE_ID":3, 31 | "ENDPOINT":"127.0.0.1:8083", 32 | "PORT":8083 33 | }, 34 | "ROCKS_DB_PATH" : "/home/zhouzj/vectordb3/storage", 35 | "WAL_PATH" : "/home/zhouzj/vectordb3/wal", 36 | "SNAP_PATH" : "/home/zhouzj/vectordb3/snap/", 37 | "ADDRESS" : "0.0.0.0", 38 | "PORT" : 7783 39 | 40 | }, 41 | { 42 | "RAFT":{ 43 | "NODE_ID":4, 44 | "ENDPOINT":"127.0.0.1:8084", 45 | "PORT":8084 46 | }, 47 | "ROCKS_DB_PATH" : "/home/zhouzj/vectordb4/storage", 48 | "WAL_PATH" : "/home/zhouzj/vectordb4/wal", 49 | "SNAP_PATH" : "/home/zhouzj/vectordb4/snap/", 50 | "ADDRESS" : "0.0.0.0", 51 | "PORT" : 7784 52 | 53 | } 54 | ], 55 | "LOG":{ 56 | "LOG_NAME" : "my_log", 57 | "LOG_LEVEL" : 1 58 | }, 59 | "TEST_ROCKS_DB_PATH" : "/home/zhouzj/test_vectordb/storage", 60 | "TEST_WAL_PATH" : "/home/zhouzj/test_vectordb/wal", 61 | "TEST_SNAP_PATH" : "/home/zhouzj/test_vectordb/snap/" 62 | 63 | } --------------------------------------------------------------------------------