├── .github └── workflows │ └── rust.yml ├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE ├── config ├── log4rs.yaml └── test_server.yaml ├── log └── requests.log ├── scripts └── repeatdly_model_test.sh ├── src ├── apps │ ├── hnsw │ │ ├── coordinator │ │ │ ├── mod.rs │ │ │ └── tests.rs │ │ ├── measurements.rs │ │ ├── mod.rs │ │ └── partition │ │ │ ├── index.rs │ │ │ ├── mod.rs │ │ │ ├── schema.rs │ │ │ ├── search.rs │ │ │ ├── service.rs │ │ │ ├── tests.rs │ │ │ ├── types.rs │ │ │ └── utils.rs │ └── mod.rs ├── config │ └── mod.rs ├── graph │ ├── edge │ │ ├── bilateral.rs │ │ ├── directed.rs │ │ ├── hyper.rs │ │ ├── macros.rs │ │ ├── mod.rs │ │ └── undirectd.rs │ ├── fields.rs │ ├── id_list.rs │ ├── local │ │ └── mod.rs │ ├── mod.rs │ ├── partitioner │ │ ├── mod.rs │ │ ├── vector.rs │ │ └── vector_test.rs │ └── vertex │ │ └── mod.rs ├── job │ ├── logger.rs │ ├── mod.rs │ └── service.rs ├── main.rs ├── query │ ├── mod.rs │ └── symbols │ │ ├── crud │ │ ├── cell.rs │ │ ├── edge.rs │ │ ├── mod.rs │ │ └── vertex.rs │ │ └── mod.rs ├── server │ ├── general.rs │ ├── mod.rs │ ├── schema │ │ ├── mod.rs │ │ └── sm.rs │ └── traversal.rs ├── tests │ ├── graph │ │ └── mod.rs │ └── mod.rs ├── traversal │ ├── bfs │ │ ├── coordinator.rs │ │ ├── engine.rs │ │ ├── mod.rs │ │ └── task.rs │ ├── mod.rs │ └── navigation │ │ ├── apps.rs │ │ ├── coordinator.rs │ │ ├── engine.rs │ │ ├── job.rs │ │ ├── mod.rs │ │ ├── server.rs │ │ ├── task.rs │ │ └── worker.rs └── utils │ ├── bloom_filter.rs │ ├── file.rs │ ├── mod.rs │ ├── ring_buffer.rs │ └── transaction.rs └── tests ├── server.rs └── test.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "develop" ] 6 | pull_request: 7 | branches: [ "develop" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose -- --test-threads=1 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Leiningen template 3 | pom.xml 4 | pom.xml.asc 5 | *jar 6 | /lib/ 7 | /classes/ 8 | /target/ 9 | /checkouts/ 10 | .lein-deps-sum 11 | .lein-repl-history 12 | .lein-plugins/ 13 | .lein-failures 14 | .nrepl-port 15 | ### Java template 16 | *.class 17 | 18 | # Mobile Tools for Java (J2ME) 19 | .mtj.tmp/ 20 | 21 | # Package Files # 22 | *.jar 23 | *.war 24 | *.ear 25 | 26 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 27 | hs_err_pid* 28 | ### OSX template 29 | .DS_Store 30 | .AppleDouble 31 | .LSOverride 32 | 33 | # Icon must end with two \r 34 | Icon 35 | 36 | # Thumbnails 37 | ._* 38 | 39 | # Files that might appear in the root of a volume 40 | .DocumentRevisions-V100 41 | .fseventsd 42 | .Spotlight-V100 43 | .TemporaryItems 44 | .Trashes 45 | .VolumeIcon.icns 46 | 47 | # Directories potentially created on remote AFP share 48 | .AppleDB 49 | .AppleDesktop 50 | Network Trash Folder 51 | Temporary Items 52 | .apdisk 53 | ### JetBrains template 54 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio 55 | 56 | *.iml 57 | 58 | ## Directory-based project format: 59 | .idea/ 60 | # if you remove the above rule, at least ignore the following: 61 | 62 | # User-specific stuff: 63 | # .idea/workspace.xml 64 | # .idea/tasks.xml 65 | # .idea/dictionaries 66 | 67 | # Sensitive or high-churn files: 68 | # .idea/dataSources.ids 69 | # .idea/dataSources.xml 70 | # .idea/sqlDataSources.xml 71 | # .idea/dynamic.xml 72 | # .idea/uiDesigner.xml 73 | 74 | # Gradle: 75 | # .idea/gradle.xml 76 | # .idea/libraries 77 | 78 | # Mongo Explorer plugin: 79 | # .idea/mongoSettings.xml 80 | 81 | ## File-based project format: 82 | *.ipr 83 | *.iws 84 | 85 | ## Plugin-specific files: 86 | 87 | # IntelliJ 88 | /out/ 89 | 90 | # mpeltonen/sbt-idea plugin 91 | .idea_modules/ 92 | 93 | # JIRA plugin 94 | atlassian-ide-plugin.xml 95 | 96 | # Crashlytics plugin (for Android Studio and IntelliJ) 97 | com_crashlytics_export_strings.xml 98 | crashlytics.properties 99 | crashlytics-build.properties 100 | 101 | #Testing configurations 102 | /configures 103 | 104 | #Testing data 105 | /data 106 | /wikidata* 107 | /computation 108 | .dat 109 | 110 | 111 | ### Rust template 112 | # Generated by Cargo 113 | 114 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 115 | # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock 116 | Cargo.lock 117 | 118 | # These are backup files generated by rustfmt 119 | **/*.rs.bk 120 | 121 | .idea -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | 3 | rust: 4 | - nightly 5 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "morpheus" 3 | version = "0.1.0" 4 | authors = ["Hao Shi "] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | neb = { git = "https://github.com/ShisoftResearch/Nebuchadnezzar", branch = "develop" } 9 | bifrost = { git = "https://github.com/shisoft/bifrost", branch = "develop" } 10 | bifrost_plugins = { git = "https://github.com/shisoft/bifrost", branch = "develop" } 11 | bifrost_hasher = { git = "https://github.com/shisoft/bifrost", branch = "develop" } 12 | bifrost_proc_macro = { git = "https://github.com/shisoft/bifrost", branch = "develop" } 13 | dovahkiin = { git = "https://github.com/ShisoftResearch/Dovahkiin.git", branch = "develop" } 14 | lightning-containers = { git = "ssh://git@192.168.10.134/shisoft-x/Lightning.git", branch = "develop" } 15 | lazy_static = "*" 16 | futures = "0.3" 17 | parking_lot = {version = "0.12", features = ["nightly"]} 18 | tokio = { version = "1.23.0", features = ["full"] } 19 | async-stream = "0.3" 20 | serde = "*" 21 | serde_derive = "*" 22 | log = "0.4" 23 | log4rs = "*" 24 | env_logger = "0.11" 25 | yaml-rust = "*" 26 | serde_yaml = "*" 27 | rand = "0.9" 28 | libc = "*" 29 | rayon = "1.10.0" 30 | num_cpus = "1.16.0" 31 | num-traits = "0.2.19" 32 | bincode = "1.3.3" 33 | async-std = "1" 34 | ahash = "0.8.11" 35 | rand_distr = "0.5.1" 36 | once_cell = "1.21.3" 37 | itertools = "0.14.0" 38 | ndarray = "0.16.1" 39 | -------------------------------------------------------------------------------- /config/log4rs.yaml: -------------------------------------------------------------------------------- 1 | refresh_rate: 30 seconds 2 | appenders: 3 | stdout: 4 | kind: console 5 | requests: 6 | kind: file 7 | path: "log/requests.log" 8 | encoder: 9 | pattern: "{d} - {m}{n}" 10 | root: 11 | level: Debug 12 | appenders: 13 | - stdout 14 | loggers: 15 | app::backend::db: 16 | level: info 17 | app::requests: 18 | level: info 19 | appenders: 20 | - requests 21 | additive: false -------------------------------------------------------------------------------- /config/test_server.yaml: -------------------------------------------------------------------------------- 1 | server_addr: 127.0.0.1:5400 2 | group_name: Morpheus 3 | meta_members: 4 | - 127.0.0.1:5400 5 | storage: 6 | chunk_count: 8 7 | memory_size: 8192 # 8GB 8 | backup_storage: null 9 | wal_storage: null 10 | services: 11 | - Cell 12 | - Transaction 13 | - RangedIndexer 14 | - Query 15 | index_enabled: true -------------------------------------------------------------------------------- /log/requests.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShisoftResearch/Morpheus/9ea3151c9c138371b582777faa8f9c817e986d9d/log/requests.log -------------------------------------------------------------------------------- /scripts/repeatdly_model_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | while true 4 | do 5 | cargo test --color=always --package morpheus --bin morpheus relationship -- --nocapture 6 | done -------------------------------------------------------------------------------- /src/apps/hnsw/coordinator/tests.rs: -------------------------------------------------------------------------------- 1 | use std::cell::RefCell; 2 | use std::sync::Arc; 3 | use std::time::Instant; 4 | 5 | use bifrost::conshash::ConsistentHashing; 6 | use bifrost::rpc::cluster::client_by_server_id; 7 | use dovahkiin::data_map; 8 | use dovahkiin::types::{Id, Map, OwnedPrimArray, OwnedValue, Type}; 9 | use neb::client::AsyncClient; 10 | use neb::ram::cell::OwnedCell; 11 | use neb::ram::schema::{Field, Schema as NebSchema}; 12 | 13 | use crate::apps::hnsw::coordinator::{AsyncServiceClient, HNSWIndexService}; 14 | use crate::apps::hnsw::measurements::MetricEncoding; 15 | use crate::apps::hnsw::partition::schema::initialize_schemas; 16 | use crate::apps::hnsw::HNSWPartitionService; 17 | use crate::job::logger::JobLogger; 18 | use crate::server::MorpheusServer; 19 | use crate::tests::start_server; 20 | use crate::traversal::navigation::Distance; 21 | 22 | const CELL_SCHEMA_ID: u32 = hash_ident!("CELL"); 23 | const VECTOR: &str = "VECTOR"; 24 | const VECTOR_FIELD_ID: u64 = hash_ident!(VECTOR) as u64; 25 | 26 | /// Struct containing test data vector 27 | #[derive(Debug, Clone)] 28 | pub struct TestVector { 29 | pub cell_id: Id, // The ID of the associated cell 30 | pub vector: OwnedPrimArray, // Vector data 31 | } 32 | 33 | impl TestVector { 34 | pub fn new(cell_higher: u64, cell_lower: u64, vector: OwnedPrimArray) -> Self { 35 | Self { 36 | cell_id: Id::new(cell_higher, cell_lower), 37 | vector, 38 | } 39 | } 40 | 41 | pub fn to_data_cell(&self) -> OwnedCell { 42 | let cell_id = self.cell_id; 43 | let map = data_map!(VECTOR: OwnedValue::PrimArray(self.vector.clone())); 44 | let cell = OwnedCell::new_with_id(CELL_SCHEMA_ID, &cell_id, OwnedValue::Map(map)); 45 | cell 46 | } 47 | } 48 | 49 | /// Test environment structure to hold all components needed for HNSW coordinator tests 50 | pub struct TestEnvironment { 51 | pub job_counter: RefCell, 52 | pub group_name: String, 53 | pub field_id: u64, 54 | pub schema_id: u32, 55 | pub server_port: u32, 56 | pub partition_id: u64, 57 | pub test_vectors: Vec, 58 | pub neb_client: Option>, 59 | pub conshash: Option>, 60 | pub job_logger: Option>, 61 | pub partition: Option>, 62 | pub coordinator: Option>, 63 | pub coordinator_client: Option>, 64 | pub morpheus: Option>, 65 | } 66 | 67 | impl TestEnvironment { 68 | /// Create a new test environment with test vectors 69 | pub fn new( 70 | server_port: u32, 71 | group_name: &str, 72 | partition_id: u64, 73 | field_id: u64, 74 | schema_id: u32, 75 | ) -> Self { 76 | let _ = env_logger::try_init(); 77 | Self { 78 | job_counter: RefCell::new(1), 79 | group_name: group_name.to_string(), 80 | server_port, 81 | partition_id, 82 | field_id, 83 | schema_id, 84 | test_vectors: Vec::new(), 85 | neb_client: None, 86 | conshash: None, 87 | job_logger: None, 88 | partition: None, 89 | coordinator: None, 90 | coordinator_client: None, 91 | morpheus: None, 92 | } 93 | } 94 | 95 | /// Add test vectors to the environment 96 | pub fn with_test_vectors(mut self, vectors: Vec>) -> Self { 97 | // Create test vectors with IDs 98 | let test_vectors = vectors 99 | .into_iter() 100 | .enumerate() 101 | .map(|(i, vector)| { 102 | let cell_higher = self.partition_id; // Most share the same partition ID 103 | let cell_lower = 1000 + i as u64; // Unique cell IDs 104 | TestVector::new(cell_higher, cell_lower, OwnedPrimArray::F32(vector)) 105 | }) 106 | .collect(); 107 | self.test_vectors = test_vectors; 108 | self 109 | } 110 | 111 | /// Initialize the JobLogger 112 | pub fn with_job_logger(mut self) -> Self { 113 | self.job_logger = Some(Arc::new(JobLogger::with_capacity(16))); 114 | self 115 | } 116 | 117 | pub async fn initialize_server(mut self) -> Self { 118 | let server = start_server(self.server_port, &self.group_name) 119 | .await 120 | .unwrap(); 121 | let server_id = server.neb_server.server_id; 122 | self.morpheus = Some(server.clone()); 123 | self.conshash = Some(server.neb_server.consh.clone()); 124 | self.partition = Some(server.init_hnsw_index_partition_service().await.unwrap()); 125 | self.coordinator = Some(server.init_hnsw_index_service().await.unwrap()); 126 | self.coordinator_client = Some( 127 | client_by_server_id(&server.neb_server.consh, server_id) 128 | .await 129 | .unwrap(), 130 | ); 131 | self.neb_client = Some(server.neb_client.clone()); 132 | self 133 | } 134 | 135 | fn cell_schema(&self) -> NebSchema { 136 | NebSchema::new_with_id( 137 | CELL_SCHEMA_ID, 138 | "text_schema", 139 | None, 140 | Field::new_schema(vec![Field::new_unindexed_array(VECTOR, Type::F32)]), 141 | false, 142 | false, 143 | ) 144 | } 145 | 146 | pub async fn initialize_schemas(self) -> Result { 147 | let neb_client = self.neb_client.as_ref().unwrap(); 148 | let morph = self.morpheus.as_ref().unwrap(); 149 | neb_client 150 | .new_schema_with_id(self.cell_schema()) 151 | .await 152 | .map_err(|e| format!("Failed to create cell schema: {:?}", e))? 153 | .map_err(|e| format!("Failed to create cell schema: {:?}", e))?; 154 | initialize_schemas(morph).await.unwrap(); 155 | Ok(self) 156 | } 157 | 158 | /// Initialize all cells 159 | pub async fn initialize_cells(self) -> Result { 160 | if self.neb_client.is_none() { 161 | return Err("NebClient not initialized".to_string()); 162 | } 163 | 164 | let neb_client = self.neb_client.as_ref().unwrap(); 165 | 166 | // Write data cells 167 | for test_vector in &self.test_vectors { 168 | let data_cell = test_vector.to_data_cell(); 169 | if let Err(e) = neb_client.write_cell(data_cell).await { 170 | return Err(format!("Failed to write data cell: {:?}", e)); 171 | } 172 | } 173 | Ok(self) 174 | } 175 | 176 | pub async fn create_index(&self) -> Result<(), String> { 177 | if self.morpheus.is_none() { 178 | return Err("Morpheus not initialized".to_string()); 179 | } 180 | let client = self.coordinator_client.as_ref().unwrap(); 181 | client 182 | .new_index("test_index".to_string(), CELL_SCHEMA_ID, VECTOR_FIELD_ID) 183 | .await 184 | .map_err(|e| format!("Failed to create index: {:?}", e))? 185 | .map_err(|e| format!("Failed to create index: {:?}", e))?; 186 | Ok(()) 187 | } 188 | 189 | /// Index cells using the coordinator 190 | pub async fn index_cells(&self) -> Result<(), String> { 191 | let ef_construction = 256; 192 | if self.morpheus.is_none() { 193 | return Err("Morpheus not initialized".to_string()); 194 | } 195 | let client = self.coordinator_client.as_ref().unwrap(); 196 | 197 | for test_vector in &self.test_vectors { 198 | client 199 | .new_cell( 200 | CELL_SCHEMA_ID, 201 | VECTOR_FIELD_ID, 202 | test_vector.cell_id, 203 | ef_construction, 204 | MetricEncoding::L2, 205 | ) 206 | .await 207 | .map_err(|e| format!("Failed to index cell: {:?}", e))? 208 | .map_err(|e| format!("Failed to index cell: {:?}", e))?; 209 | } 210 | Ok(()) 211 | } 212 | 213 | /// Run a search query using the coordinator 214 | pub async fn top_k(&self, query: Vec, k: u32) -> Result, String> { 215 | let start = Instant::now(); 216 | if self.coordinator.is_none() { 217 | return Err("Coordinator not initialized".to_string()); 218 | } 219 | let client = self.coordinator_client.as_ref().unwrap(); 220 | let results = client 221 | .query_top_k( 222 | CELL_SCHEMA_ID, 223 | VECTOR_FIELD_ID, 224 | OwnedPrimArray::F32(query), 225 | k, 226 | 32, // ef 227 | 10, // max_iter 228 | MetricEncoding::L2, 229 | ) 230 | .await 231 | .map_err(|e| format!("Failed to search: {:?}", e))? 232 | .map_err(|e| format!("Failed to search: {:?}", e))?; 233 | let duration = start.elapsed(); 234 | println!("Search took {:?} microseconds", duration.as_micros()); 235 | Ok(results) 236 | } 237 | } 238 | 239 | // Simple helper for creating test vectors 240 | pub fn create_test_vectors(count: usize, dim: usize) -> Vec> { 241 | let mut vectors = Vec::with_capacity(count); 242 | for i in 0..count { 243 | let vector = (0..dim).map(|j| (i * dim + j) as Distance * 0.1).collect(); 244 | vectors.push(vector); 245 | } 246 | vectors 247 | } 248 | 249 | #[cfg(test)] 250 | mod tests { 251 | use super::*; 252 | 253 | #[tokio::test] 254 | async fn test_create_index() { 255 | let env = TestEnvironment::new( 256 | 5000, 257 | "test_create_index", 258 | 1, 259 | VECTOR_FIELD_ID, 260 | CELL_SCHEMA_ID, 261 | ) 262 | .with_job_logger() 263 | .initialize_server() 264 | .await 265 | .initialize_schemas() 266 | .await 267 | .unwrap(); 268 | env.create_index().await.unwrap(); 269 | } 270 | 271 | #[tokio::test] 272 | async fn test_index_one_cell() { 273 | let env = TestEnvironment::new( 274 | 5001, 275 | "test_index_one_cell", 276 | 1, 277 | VECTOR_FIELD_ID, 278 | CELL_SCHEMA_ID, 279 | ) 280 | .with_test_vectors(vec![vec![1.0, 2.0, 3.0]]) 281 | .with_job_logger() 282 | .initialize_server() 283 | .await 284 | .initialize_schemas() 285 | .await 286 | .unwrap() 287 | .initialize_cells() 288 | .await 289 | .unwrap(); 290 | env.create_index().await.unwrap(); 291 | env.index_cells().await.unwrap(); 292 | } 293 | 294 | #[tokio::test] 295 | async fn test_index_and_query_one_cell() { 296 | let env = TestEnvironment::new( 297 | 5002, 298 | "test_index_and_query_one_cell", 299 | 1, 300 | VECTOR_FIELD_ID, 301 | CELL_SCHEMA_ID, 302 | ) 303 | .with_test_vectors(vec![vec![1.0, 2.0, 3.0]]) 304 | .with_job_logger() 305 | .initialize_server() 306 | .await 307 | .initialize_schemas() 308 | .await 309 | .unwrap() 310 | .initialize_cells() 311 | .await 312 | .unwrap(); 313 | env.create_index().await.unwrap(); 314 | env.index_cells().await.unwrap(); 315 | 316 | let query = vec![1.0, 2.0, 3.0]; 317 | let results = env.top_k(query, 1).await.unwrap(); 318 | assert_eq!(results.len(), 1); 319 | assert_eq!(results[0].0.lower, 1000); // First cell 320 | assert_eq!(results[0].1, 0.0); // Exact match 321 | } 322 | 323 | #[tokio::test] 324 | async fn test_index_and_query_multiple_cells() { 325 | let env = TestEnvironment::new( 326 | 5003, 327 | "test_index_and_query_multiple_cells", 328 | 1, 329 | VECTOR_FIELD_ID, 330 | CELL_SCHEMA_ID, 331 | ) 332 | .with_test_vectors(vec![ 333 | vec![1.0, 2.0, 3.0], 334 | vec![4.0, 5.0, 6.0], 335 | vec![7.0, 8.0, 9.0], 336 | ]) 337 | .with_job_logger() 338 | .initialize_server() 339 | .await 340 | .initialize_schemas() 341 | .await 342 | .unwrap() 343 | .initialize_cells() 344 | .await 345 | .unwrap(); 346 | env.create_index().await.unwrap(); 347 | env.index_cells().await.unwrap(); 348 | 349 | let query = vec![4.0, 5.0, 6.0]; 350 | let results = env.top_k(query, 2).await.unwrap(); 351 | assert_eq!(results.len(), 2); 352 | assert_eq!(results[0].0.lower, 1001); // Second cell 353 | assert_eq!(results[0].1, 0.0); // Exact match 354 | assert!(results[1].0.lower == 1000 || results[1].0.lower == 1002); // First or third cell 355 | } 356 | 357 | #[tokio::test] 358 | async fn test_index_and_query_many_cells() { 359 | // Create 50 test vectors 360 | let mut test_vectors = Vec::with_capacity(50); 361 | for i in 0..50 { 362 | test_vectors.push(vec![i as f32, (i + 1) as f32, (i + 2) as f32]); 363 | } 364 | 365 | let env = TestEnvironment::new( 366 | 5004, 367 | "test_index_and_query_many_cells", 368 | 1, 369 | VECTOR_FIELD_ID, 370 | CELL_SCHEMA_ID, 371 | ) 372 | .with_test_vectors(test_vectors) 373 | .with_job_logger() 374 | .initialize_server() 375 | .await 376 | .initialize_schemas() 377 | .await 378 | .unwrap() 379 | .initialize_cells() 380 | .await 381 | .unwrap(); 382 | env.create_index().await.unwrap(); 383 | env.index_cells().await.unwrap(); 384 | 385 | let query = vec![10.0, 11.0, 12.0]; 386 | let k = 5; 387 | let results = env.top_k(query, k).await.unwrap(); 388 | assert_eq!(results.len(), k as usize); 389 | assert_eq!(results[0].0.lower, 1010); // Tenth cell 390 | assert_eq!(results[0].1, 0.0); // Exact match 391 | assert!(results[1].0.lower == 1009 || results[1].0.lower == 1011); // Neighboring cells 392 | } 393 | 394 | #[tokio::test] 395 | async fn test_error_cases() { 396 | let env = 397 | TestEnvironment::new(5005, "test_error_cases", 1, VECTOR_FIELD_ID, CELL_SCHEMA_ID) 398 | .with_job_logger() 399 | .initialize_server() 400 | .await 401 | .initialize_schemas() 402 | .await 403 | .unwrap(); 404 | 405 | // Test querying before creating index 406 | let query = vec![1.0, 2.0, 3.0]; 407 | let result = env.top_k(query, 1).await; 408 | assert!(result.is_err()); 409 | 410 | // Test querying with invalid schema 411 | env.create_index().await.unwrap(); 412 | let result = env 413 | .coordinator_client 414 | .as_ref() 415 | .unwrap() 416 | .query_top_k( 417 | 999, // Invalid schema 418 | VECTOR_FIELD_ID, 419 | OwnedPrimArray::F32(vec![1.0, 2.0, 3.0]), 420 | 1, 421 | 32, 422 | 10, 423 | MetricEncoding::L2, 424 | ) 425 | .await; 426 | match result { 427 | Ok(Ok(_)) => panic!("Expected error"), 428 | Ok(Err(e)) => { 429 | println!("Should be schema error: {:?}", e); 430 | } 431 | Err(e) => panic!("RPC Error: {:?}", e), 432 | } 433 | } 434 | } 435 | -------------------------------------------------------------------------------- /src/apps/hnsw/measurements.rs: -------------------------------------------------------------------------------- 1 | use dovahkiin::types::OwnedPrimArray; 2 | use ndarray::ArrayView1; 3 | use num_traits::{FromPrimitive, Num, One, Zero}; 4 | use std::fmt::Debug; 5 | 6 | use crate::traversal::navigation::Distance; 7 | 8 | pub use neb::index::vector::MetricEncoding; 9 | 10 | /// A trait for computing the distance between two vectors of generic numbers. 11 | /// The type parameter N represents the numeric type. 12 | pub trait HnswMetric: Clone + Copy { 13 | fn distance(&self, a: &[N], b: &[N]) -> N 14 | where 15 | N: Num + FromPrimitive + Zero + Pow + PartialOrd + Copy + Debug + 'static; 16 | } 17 | 18 | /// L2Metric computes the Euclidean distance (sum of squared differences). 19 | #[derive(Debug, Clone, Copy)] 20 | pub struct L2Metric; 21 | 22 | impl HnswMetric for L2Metric { 23 | fn distance(&self, a: &[N], b: &[N]) -> N 24 | where 25 | N: Num + FromPrimitive + Zero + Pow + PartialOrd + Copy + Debug, 26 | { 27 | // Create ndarray views 28 | let a_arr = ArrayView1::from(a); 29 | let b_arr = ArrayView1::from(b); 30 | // Compute (a - b) elementwise, square each difference and sum them. 31 | (&a_arr - &b_arr).mapv(|x| x * x).sum() 32 | } 33 | } 34 | 35 | /// CosineMetric computes cosine distance as 1 - (dot(a, b) / (||a|| * ||b||)). 36 | /// Best used with floating-point types. 37 | #[derive(Debug, Clone, Copy)] 38 | pub struct CosineMetric; 39 | 40 | impl HnswMetric for CosineMetric { 41 | fn distance(&self, a: &[N], b: &[N]) -> N 42 | where 43 | // For cosine, the implementation assumes the ability to convert from f64. 44 | N: Num + FromPrimitive + Zero + Pow + PartialOrd + Copy + Debug + 'static, 45 | { 46 | let one = N::one(); 47 | let a_arr = ArrayView1::from(a); 48 | let b_arr = ArrayView1::from(b); 49 | // Compute the dot product using ndarray's dot method. 50 | let dot = a_arr.dot(&b_arr); 51 | // For the norms, compute the squared sum and then “raise” it to the 0.5 power. 52 | let exponent = N::from_f64(0.5).unwrap(); 53 | let a_norm_squared: N = a_arr.mapv(|x| x * x).sum(); 54 | let b_norm_squared: N = b_arr.mapv(|x| x * x).sum(); 55 | let a_norm = a_norm_squared.pow(exponent); 56 | let b_norm = b_norm_squared.pow(exponent); 57 | if a_norm == N::zero() || b_norm == N::zero() { 58 | return one; 59 | } 60 | one - (dot / (a_norm * b_norm)) 61 | } 62 | } 63 | 64 | /// ManhattanMetric computes the Manhattan distance (sum of absolute differences). 65 | #[derive(Debug, Clone, Copy)] 66 | pub struct ManhattanMetric; 67 | 68 | impl HnswMetric for ManhattanMetric { 69 | fn distance(&self, a: &[N], b: &[N]) -> N 70 | where 71 | N: Num + FromPrimitive + Zero + Pow + PartialOrd + Copy + Debug, 72 | { 73 | let a_arr = ArrayView1::from(a); 74 | let b_arr = ArrayView1::from(b); 75 | // Zip the arrays and sum the absolute differences. 76 | a_arr 77 | .iter() 78 | .zip(b_arr.iter()) 79 | .fold(N::zero(), |acc, (&x, &y)| { 80 | let diff = if x < y { y - x } else { x - y }; 81 | acc + diff 82 | }) 83 | } 84 | } 85 | 86 | /// ChebyshevMetric computes the Chebyshev distance (maximum absolute difference). 87 | #[derive(Debug, Clone, Copy)] 88 | pub struct ChebyshevMetric; 89 | 90 | impl HnswMetric for ChebyshevMetric { 91 | fn distance(&self, a: &[N], b: &[N]) -> N 92 | where 93 | N: Num + FromPrimitive + Zero + Pow + PartialOrd + Copy + Debug, 94 | { 95 | let a_arr = ArrayView1::from(a); 96 | let b_arr = ArrayView1::from(b); 97 | a_arr 98 | .iter() 99 | .zip(b_arr.iter()) 100 | .fold(N::zero(), |max, (&x, &y)| { 101 | let diff = if x < y { y - x } else { x - y }; 102 | if diff > max { 103 | diff 104 | } else { 105 | max 106 | } 107 | }) 108 | } 109 | } 110 | 111 | /// An enum to encapsulate the various metric types. 112 | #[derive(Debug, Clone, Copy)] 113 | pub enum Metric { 114 | L2(L2Metric), 115 | Cosine(CosineMetric), 116 | Manhattan(ManhattanMetric), 117 | Chebyshev(ChebyshevMetric), 118 | } 119 | 120 | impl Metric { 121 | pub fn to_encoding(self) -> MetricEncoding { 122 | match self { 123 | Metric::L2(_metric) => MetricEncoding::L2, 124 | Metric::Cosine(_metric) => MetricEncoding::Cosine, 125 | Metric::Manhattan(_metric) => MetricEncoding::Manhattan, 126 | Metric::Chebyshev(_metric) => MetricEncoding::Chebyshev, 127 | } 128 | } 129 | 130 | pub fn from_encoding(metric: MetricEncoding) -> Self { 131 | match metric { 132 | MetricEncoding::L2 => Metric::L2(L2Metric), 133 | MetricEncoding::Cosine => Metric::Cosine(CosineMetric), 134 | MetricEncoding::Manhattan => Metric::Manhattan(ManhattanMetric), 135 | MetricEncoding::Chebyshev => Metric::Chebyshev(ChebyshevMetric), 136 | } 137 | } 138 | } 139 | 140 | /// A trait to support exponentiation. For floats we use powf and for integers their native pow. 141 | pub trait Pow { 142 | type Output; 143 | fn pow(self, rhs: RHS) -> Self::Output; 144 | } 145 | 146 | impl Pow for f32 { 147 | type Output = f32; 148 | fn pow(self, rhs: f32) -> f32 { 149 | self.powf(rhs) 150 | } 151 | } 152 | 153 | impl Pow for f64 { 154 | type Output = f64; 155 | fn pow(self, rhs: f64) -> f64 { 156 | self.powf(rhs) 157 | } 158 | } 159 | 160 | impl Pow for u32 { 161 | type Output = u32; 162 | fn pow(self, rhs: u32) -> u32 { 163 | self.pow(rhs) 164 | } 165 | } 166 | 167 | impl Pow for i32 { 168 | type Output = i32; 169 | fn pow(self, rhs: u32) -> i32 { 170 | self.pow(rhs) 171 | } 172 | } 173 | 174 | impl Pow for u64 { 175 | type Output = u64; 176 | fn pow(self, rhs: u32) -> u64 { 177 | self.pow(rhs) 178 | } 179 | } 180 | 181 | impl Pow for u8 { 182 | type Output = u8; 183 | fn pow(self, rhs: u8) -> u8 { 184 | self.pow(rhs as u32) 185 | } 186 | } 187 | 188 | impl Pow for u16 { 189 | type Output = u16; 190 | fn pow(self, rhs: u16) -> u16 { 191 | self.pow(rhs as u32) 192 | } 193 | } 194 | 195 | impl Pow for i8 { 196 | type Output = i8; 197 | fn pow(self, rhs: i8) -> i8 { 198 | self.pow(rhs as u32) 199 | } 200 | } 201 | 202 | impl Pow for i16 { 203 | type Output = i16; 204 | fn pow(self, rhs: i16) -> i16 { 205 | self.pow(rhs as u32) 206 | } 207 | } 208 | 209 | impl Pow for i32 { 210 | type Output = i32; 211 | fn pow(self, rhs: i32) -> i32 { 212 | self.pow(rhs as u32) 213 | } 214 | } 215 | 216 | impl Pow for i64 { 217 | type Output = i64; 218 | fn pow(self, rhs: i64) -> i64 { 219 | self.pow(rhs as u32) 220 | } 221 | } 222 | 223 | #[derive(Debug, Clone, Copy)] 224 | pub enum DistanceError { 225 | ValueTypeMismatch, 226 | ArrayTypeNotSupported, 227 | } 228 | 229 | pub fn vector_distance( 230 | metric: M, 231 | vector_a: &OwnedPrimArray, 232 | vector_b: &OwnedPrimArray, 233 | ) -> Option 234 | where 235 | M: HnswMetric, 236 | { 237 | match (vector_a, vector_b) { 238 | // float 239 | (OwnedPrimArray::F32(va), OwnedPrimArray::F32(vb)) => { 240 | let m = metric.distance(va.as_slice(), vb); 241 | return Some(m as Distance); 242 | } 243 | (OwnedPrimArray::F64(va), OwnedPrimArray::F64(vb)) => { 244 | let m = metric.distance(va.as_slice(), vb); 245 | return Some(m as Distance); 246 | } 247 | // // int 248 | (OwnedPrimArray::I8(va), OwnedPrimArray::I8(vb)) => { 249 | let m = metric.distance(va.as_slice(), vb); 250 | return Some(m as Distance); 251 | } 252 | (OwnedPrimArray::I16(va), OwnedPrimArray::I16(vb)) => { 253 | let m = metric.distance(va.as_slice(), vb); 254 | return Some(m as Distance); 255 | } 256 | (OwnedPrimArray::I32(va), OwnedPrimArray::I32(vb)) => { 257 | let m = metric.distance(va.as_slice(), vb); 258 | return Some(m as Distance); 259 | } 260 | (OwnedPrimArray::I64(va), OwnedPrimArray::I64(vb)) => { 261 | let m = metric.distance(va.as_slice(), vb); 262 | return Some(m as Distance); 263 | } 264 | // uint 265 | (OwnedPrimArray::U8(va), OwnedPrimArray::U8(vb)) => { 266 | let m = metric.distance(va.as_slice(), vb); 267 | return Some(m as Distance); 268 | } 269 | (OwnedPrimArray::U16(va), OwnedPrimArray::U16(vb)) => { 270 | let m = metric.distance(va.as_slice(), vb); 271 | return Some(m as Distance); 272 | } 273 | (OwnedPrimArray::U32(va), OwnedPrimArray::U32(vb)) => { 274 | let m = metric.distance(va.as_slice(), vb); 275 | return Some(m as Distance); 276 | } 277 | _ => None, 278 | } 279 | } 280 | 281 | #[cfg(test)] 282 | mod tests { 283 | use super::*; 284 | use std::f64::consts::PI; 285 | 286 | #[test] 287 | fn test_l2_metric() { 288 | let metric = L2Metric; 289 | 290 | // Test with simple vectors 291 | let a = vec![1.0, 2.0, 3.0]; 292 | let b = vec![4.0, 5.0, 6.0]; 293 | let distance = metric.distance(&a, &b); 294 | assert_eq!(distance, 27.0); // (3^2 + 3^2 + 3^2) = 27 295 | 296 | // Test with zero distance 297 | let c = vec![1.0, 2.0, 3.0]; 298 | let distance = metric.distance(&a, &c); 299 | assert_eq!(distance, 0.0); 300 | 301 | // Test with negative values 302 | let d = vec![-1.0, -2.0, -3.0]; 303 | let distance = metric.distance(&a, &d); 304 | assert_eq!(distance, 56.0); // (2^2 + 4^2 + 6^2) = 56 305 | } 306 | 307 | #[test] 308 | fn test_cosine_metric() { 309 | let metric = CosineMetric; 310 | 311 | // Test with orthogonal vectors (should be 1.0 - 0.0 = 1.0) 312 | let a = vec![1.0f64, 0.0]; 313 | let b = vec![0.0, 1.0]; 314 | let distance = metric.distance(&a, &b); 315 | assert!((distance - 1.0).abs() < 1e-10); 316 | 317 | // Test with parallel vectors (should be 1.0 - 1.0 = 0.0) 318 | let c = vec![2.0f64, 4.0]; 319 | let d = vec![1.0, 2.0]; 320 | let distance = metric.distance(&c, &d); 321 | assert!(distance.abs() < 1e-10); 322 | } 323 | 324 | #[test] 325 | fn test_manhattan_metric() { 326 | let metric = ManhattanMetric; 327 | 328 | // Test with simple vectors 329 | let a = vec![1.0, 2.0, 3.0]; 330 | let b = vec![4.0, 5.0, 6.0]; 331 | let distance = metric.distance(&a, &b); 332 | assert_eq!(distance, 9.0); // |4-1| + |5-2| + |6-3| = 3 + 3 + 3 = 9 333 | 334 | // Test with zero distance 335 | let c = vec![1.0, 2.0, 3.0]; 336 | let distance = metric.distance(&a, &c); 337 | assert_eq!(distance, 0.0); 338 | 339 | // Test with negative values 340 | let d = vec![-1.0, -2.0, -3.0]; 341 | let distance = metric.distance(&a, &d); 342 | assert_eq!(distance, 12.0); // |1-(-1)| + |2-(-2)| + |3-(-3)| = 2 + 4 + 6 = 12 343 | } 344 | 345 | #[test] 346 | fn test_chebyshev_metric() { 347 | let metric = ChebyshevMetric; 348 | 349 | // Test with simple vectors 350 | let a = vec![1.0, 2.0, 3.0]; 351 | let b = vec![4.0, 5.0, 6.0]; 352 | let distance = metric.distance(&a, &b); 353 | assert_eq!(distance, 3.0); // max(|4-1|, |5-2|, |6-3|) = max(3, 3, 3) = 3 354 | 355 | // Test with zero distance 356 | let c = vec![1.0, 2.0, 3.0]; 357 | let distance = metric.distance(&a, &c); 358 | assert_eq!(distance, 0.0); 359 | 360 | // Test with negative values 361 | let d = vec![-1.0, -2.0, -3.0]; 362 | let distance = metric.distance(&a, &d); 363 | assert_eq!(distance, 6.0); // max(|1-(-1)|, |2-(-2)|, |3-(-3)|) = max(2, 4, 6) = 6 364 | } 365 | 366 | #[test] 367 | fn test_metric_encoding() { 368 | // Test conversion from Metric to MetricEncoding and back 369 | let l2 = Metric::L2(L2Metric); 370 | let encoding = Metric::to_encoding(l2); 371 | assert!(matches!(encoding, MetricEncoding::L2)); 372 | let metric = Metric::from_encoding(encoding); 373 | assert!(matches!(metric, Metric::L2(_))); 374 | 375 | let cosine = Metric::Cosine(CosineMetric); 376 | let encoding = cosine.to_encoding(); 377 | assert!(matches!(encoding, MetricEncoding::Cosine)); 378 | let metric = Metric::from_encoding(encoding); 379 | assert!(matches!(metric, Metric::Cosine(_))); 380 | 381 | let manhattan = Metric::Manhattan(ManhattanMetric); 382 | let encoding = manhattan.to_encoding(); 383 | assert!(matches!(encoding, MetricEncoding::Manhattan)); 384 | let metric = Metric::from_encoding(encoding); 385 | assert!(matches!(metric, Metric::Manhattan(_))); 386 | 387 | let chebyshev = Metric::Chebyshev(ChebyshevMetric); 388 | let encoding = chebyshev.to_encoding(); 389 | assert!(matches!(encoding, MetricEncoding::Chebyshev)); 390 | let metric = Metric::from_encoding(encoding); 391 | assert!(matches!(metric, Metric::Chebyshev(_))); 392 | } 393 | } 394 | -------------------------------------------------------------------------------- /src/apps/hnsw/mod.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use bifrost::rpc::cluster::client_by_server_id; 4 | use bifrost_hasher::hash_str; 5 | use dovahkiin::types::Id; 6 | use futures::{ 7 | future::{BoxFuture, FutureExt}, 8 | TryFutureExt, 9 | }; 10 | use neb::{ 11 | index::{ 12 | builder::IndexError, 13 | vector::{set_vector_index_core, VectorIndexerCore}, 14 | }, 15 | 16 | server::NebServer, 17 | }; 18 | 19 | 20 | use crate::{ 21 | apps::hnsw::coordinator::AsyncServiceClient, 22 | server::MorpheusServer, 23 | }; 24 | 25 | 26 | 27 | pub mod coordinator; 28 | pub mod measurements; 29 | pub mod partition; 30 | 31 | use partition::schema::*; 32 | 33 | // Re-export 34 | pub use partition::HNSWPartitionService; 35 | 36 | pub const DEFAULT_EF_CONSTRUCTION: u64 = 256; 37 | 38 | pub async fn initial_app(morph: &Arc) -> Result<(), String> { 39 | initialize_schemas(morph).await?; 40 | return Ok(()); 41 | } 42 | 43 | pub fn meta_index_id(schema: u32, field_id: u64) -> Id { 44 | let lo_str = format!("HNSW_IDX-{}", schema); 45 | let hi_str = format!("HNSW_IDX-{}", field_id); 46 | let lo_hash = hash_str(&lo_str); 47 | let hi_hash = hash_str(&hi_str); 48 | Id::new(lo_hash, hi_hash) 49 | } 50 | 51 | pub struct VectorIndexer { 52 | ef_construction: u64, 53 | coordinator: Arc, 54 | } 55 | 56 | impl VectorIndexerCore for VectorIndexer { 57 | fn insert( 58 | &self, 59 | cell_id: &Id, 60 | schema_id: u32, 61 | field_id: u64, 62 | metric_encoding: neb::index::vector::MetricEncoding, 63 | ) -> BoxFuture> { 64 | self.coordinator 65 | .new_cell( 66 | schema_id, 67 | field_id, 68 | *cell_id, 69 | self.ef_construction, 70 | metric_encoding, 71 | ) 72 | .map_err(IndexError::RPCError) 73 | .and_then(|r| async move { r.map_err(IndexError::Other) }) 74 | .boxed() 75 | } 76 | 77 | fn remove( 78 | &self, 79 | cell_id: &Id, 80 | _schema_id: u32, 81 | _field_id: u64, 82 | ) -> BoxFuture> { 83 | self.coordinator 84 | .del_cell(*cell_id) 85 | .map_err(IndexError::RPCError) 86 | .and_then(|r| async move { r.map_err(IndexError::Other) }) 87 | .boxed() 88 | } 89 | 90 | fn new_index(&self, schema_id: u32, field_id: u64) -> BoxFuture> { 91 | let index_name = format!("HNSW-{}-{}", schema_id, field_id); 92 | self.coordinator 93 | .new_index(index_name, schema_id, field_id) 94 | .map_err(IndexError::RPCError) 95 | .and_then(|r| async move { r.map_err(IndexError::Other) }) 96 | .boxed() 97 | } 98 | 99 | fn delete_index( 100 | &self, 101 | schema_id: u32, 102 | field_id: u64, 103 | ) -> BoxFuture> { 104 | self.coordinator 105 | .delete_index(schema_id, field_id) 106 | .map_err(IndexError::RPCError) 107 | .and_then(|r| async move { r.map_err(IndexError::Other) }) 108 | .boxed() 109 | } 110 | } 111 | 112 | impl VectorIndexer { 113 | pub async fn new(neb_server: &Arc) -> Self { 114 | let server_id = neb_server.server_id; 115 | let conshash = &neb_server.consh; 116 | let coordinator_client = client_by_server_id(conshash, server_id).await.unwrap(); 117 | Self { 118 | coordinator: coordinator_client, 119 | ef_construction: DEFAULT_EF_CONSTRUCTION, 120 | } 121 | } 122 | 123 | pub fn set_core(self) { 124 | set_vector_index_core(self); 125 | } 126 | 127 | pub async fn new_and_set_core(neb_server: &Arc) { 128 | let indexer = Self::new(neb_server).await; 129 | indexer.set_core(); 130 | } 131 | 132 | pub fn set_ef_construction(&mut self, ef_construction: u64) { 133 | self.ef_construction = ef_construction; 134 | } 135 | } 136 | 137 | unsafe impl Send for VectorIndexer {} 138 | unsafe impl Sync for VectorIndexer {} 139 | -------------------------------------------------------------------------------- /src/apps/hnsw/partition/index.rs: -------------------------------------------------------------------------------- 1 | use bifrost::conshash::ConsistentHashing; 2 | use dovahkiin::{ 3 | data_map, 4 | types::{Id, OwnedPrimArray, OwnedValue}, 5 | }; 6 | use neb::ram::cell::OwnedCell; 7 | use std::{collections::BTreeMap, sync::Arc}; 8 | 9 | use super::{schema::*, HNSWIndexError}; 10 | use crate::{ 11 | apps::hnsw::measurements::HnswMetric, 12 | graph::GraphEngine, 13 | job::{ 14 | logger::{append_job_log, JobLogLevel, JobLogger}, 15 | JobId, 16 | }, 17 | }; 18 | use dovahkiin::types::Map; 19 | pub struct HNSWIndex { 20 | pub id: Id, 21 | pub name: String, 22 | pub max_level: u32, 23 | pub schema: u32, 24 | pub field: u64, 25 | // Partition local top level vertices 26 | // Key is the LSH gray code and value is the vertex id 27 | pub top_level_vertices: BTreeMap, 28 | } 29 | 30 | impl HNSWIndex { 31 | pub fn new(id: Id, name: String, schema: u32, field: u64) -> Self { 32 | Self { 33 | id, 34 | name, 35 | max_level: 0, 36 | schema, 37 | field, 38 | top_level_vertices: BTreeMap::new(), 39 | } 40 | } 41 | pub fn to_cell(&self) -> OwnedCell { 42 | let top_level = self 43 | .top_level_vertices 44 | .values() 45 | .cloned() 46 | .collect::>(); 47 | let top_level_array = OwnedPrimArray::Id(top_level); 48 | let map = data_map! { 49 | NAME: self.name.clone(), 50 | NUM_LEVELS: self.max_level, 51 | SCHEMA: self.schema, 52 | FIELD: self.field, 53 | TOP_LEVEL_VERTICES: OwnedValue::PrimArray(top_level_array) 54 | }; 55 | OwnedCell::new_with_id(INDEX_SCHEMA_ID, &self.id, OwnedValue::Map(map)) 56 | } 57 | 58 | // Add this helper method to HNSWIndex to convert metric encoding to HnswMetric implementation 59 | pub fn metric_to_hnsw_metric(&self) -> impl HnswMetric { 60 | // Default to L2 metric if not specified 61 | super::super::measurements::L2Metric 62 | } 63 | 64 | pub async fn save( 65 | &self, 66 | engine: &Arc, 67 | server_id: u64, 68 | conshash: &Arc, 69 | job_logger: &Arc, 70 | job_id: JobId, 71 | ) -> Result<(), HNSWIndexError> { 72 | let cell_id = self.id; 73 | let txn_res = engine 74 | .neb_client() 75 | .transaction(|txn| { 76 | let cell_id = cell_id; 77 | let server_id = server_id; 78 | let conshash = conshash.clone(); 79 | let top_level_vertices = self 80 | .top_level_vertices 81 | .values() 82 | .cloned() 83 | .collect::>(); 84 | debug_assert!(!cell_id.is_unit_id()); 85 | async move { 86 | let mut cell = match txn.read(cell_id).await { 87 | Ok(Some(cell)) => cell, 88 | Ok(None) => { 89 | append_job_log( 90 | job_logger, 91 | job_id, 92 | JobLogLevel::Error, 93 | format!("Index not found: {:?}", cell_id), 94 | ); 95 | return Ok(Err(HNSWIndexError::IndexNotFound)); 96 | } 97 | Err(e) => return Ok(Err(HNSWIndexError::TxnError(e))), 98 | }; 99 | let top_level = match cell[NUM_LEVELS].u32() { 100 | Some(top_level) => top_level, 101 | None => { 102 | return Ok(Err(HNSWIndexError::IndexFormatError(format!( 103 | "Top level not found: {:?}", 104 | cell[NUM_LEVELS] 105 | )))) 106 | } 107 | }; 108 | let mut new_vertices = Vec::new(); 109 | if *top_level == self.max_level { 110 | // Need to preserve the existing top level vertices 111 | let vertices = match cell[TOP_LEVEL_VERTICES].prim_array() { 112 | Some(OwnedPrimArray::Id(vertices)) => vertices, 113 | Some(_) => { 114 | return Ok(Err(HNSWIndexError::IndexFormatError(format!( 115 | "Top level vertices is not a list of ids: {:?}", 116 | cell[TOP_LEVEL_VERTICES] 117 | )))) 118 | } 119 | None => { 120 | return Ok(Err(HNSWIndexError::IndexFormatError( 121 | "Top level vertices not found".to_string(), 122 | ))) 123 | } 124 | }; 125 | 126 | new_vertices = vertices 127 | .iter() 128 | .filter_map(|id| { 129 | let id_server_id = conshash.get_server_id(id.higher); 130 | if id_server_id == Some(server_id) { 131 | None 132 | } else { 133 | Some(*id) 134 | } 135 | }) 136 | .collect::>(); 137 | } 138 | new_vertices.extend(top_level_vertices); 139 | cell[TOP_LEVEL_VERTICES] = 140 | OwnedValue::PrimArray(OwnedPrimArray::Id(new_vertices)); 141 | cell[NUM_LEVELS] = OwnedValue::U32(self.max_level); 142 | match txn.update(cell).await { 143 | Ok(_) => Ok(Ok(())), 144 | Err(e) => Ok(Err(HNSWIndexError::TxnError(e))), 145 | } 146 | } 147 | }) 148 | .await; 149 | match txn_res { 150 | Ok(Ok(_)) => Ok(()), 151 | Ok(Err(e)) => Err(e), 152 | Err(e) => Err(HNSWIndexError::TxnError(e)), 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/apps/hnsw/partition/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod index; 2 | pub mod schema; 3 | pub mod search; 4 | pub mod service; 5 | #[cfg(test)] 6 | pub mod tests; 7 | pub mod types; 8 | pub mod utils; 9 | 10 | pub use service::HNSWPartitionService; 11 | pub use types::{HNSWIndexError, SearchMetadata}; 12 | -------------------------------------------------------------------------------- /src/apps/hnsw/partition/schema.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use bifrost_hasher::hash_str; 4 | use dovahkiin::data_map; 5 | use dovahkiin::types::{Map, OwnedValue, Type}; 6 | use neb::ram::cell::{Cell, OwnedCell}; 7 | use neb::ram::schema::{Field, IndexType, Schema as NebSchema}; 8 | 9 | use crate::graph::edge; 10 | use crate::graph::vertex::Vertex; 11 | use crate::server::{schema::*, MorpheusServer}; 12 | 13 | use super::index::HNSWIndex; 14 | use super::search::MAX_LEVEL_CAP; 15 | 16 | pub const NAME: &str = "NAME"; 17 | pub const ENTRY: &str = "ENTRY"; 18 | pub const NUM_LEVELS: &str = "NUM_LEVELS"; 19 | pub const CELL: &str = "CELL"; 20 | pub const SCHEMA: &str = "SCHEMA"; 21 | pub const FIELD: &str = "FIELD"; 22 | pub const LSH: &str = "LSH"; 23 | pub const TOP_LEVEL_VERTICES: &str = "TOP_LEVEL_VERTICES"; 24 | pub const INDEX_SCHEMA_NAME: &str = "_hnsw-index"; 25 | 26 | pub const INDEX_SCHEMA_ID: u32 = hash_ident!(INDEX_SCHEMA_NAME) as u32; 27 | pub const CELL_FIELD_ID: u64 = hash_ident!(CELL) as u64; 28 | 29 | fn group_index_name() -> String { 30 | INDEX_SCHEMA_NAME.to_string() 31 | } 32 | 33 | pub fn group_index_schema_id() -> u32 { 34 | INDEX_SCHEMA_ID 35 | } 36 | 37 | fn edge_schema_name(name: &str, level: usize) -> String { 38 | format!("_hnsw-edge-{}@{}", name, level) 39 | } 40 | 41 | // One cell for each (schema, field) pair 42 | pub fn index_schema() -> NebSchema { 43 | NebSchema::new_with_id( 44 | INDEX_SCHEMA_ID, 45 | &group_index_name(), 46 | None, 47 | Field::new_schema(vec![ 48 | Field::new_unindexed(NAME, Type::String), 49 | Field::new_unindexed(NUM_LEVELS, Type::U32), 50 | Field::new_unindexed(SCHEMA, Type::U32), 51 | Field::new_unindexed(FIELD, Type::U64), 52 | Field::new_unindexed_array(TOP_LEVEL_VERTICES, Type::Id), 53 | ]), 54 | false, 55 | true, // Enable enumeration 56 | ) 57 | } 58 | 59 | pub fn index_cell(index: &HNSWIndex) -> OwnedCell { 60 | let id = index.id; 61 | OwnedCell::new_with_id( 62 | INDEX_SCHEMA_ID, 63 | &id, 64 | OwnedValue::Map(data_map!( 65 | NAME: index.name.clone(), 66 | NUM_LEVELS: index.max_level as u32, 67 | SCHEMA: index.schema as u32, 68 | FIELD: index.field, 69 | TOP_LEVEL_VERTICES: index.top_level_vertices.values().cloned().collect::>() 70 | )), 71 | ) 72 | } 73 | 74 | fn edge_schema(name: &str, level: usize) -> MorpheusSchema { 75 | let edge_schema_name = edge_schema_name(name, level); 76 | let edge_schema_id = hash_str(&edge_schema_name) as u32; 77 | MorpheusSchema::new_edge_with_id( 78 | edge_schema_id, 79 | &edge_schema_name, 80 | None, 81 | &EMPTY_FIELDS, 82 | edge::EdgeAttributes::new(edge::EdgeType::Undirected, false), 83 | false, 84 | ) 85 | } 86 | 87 | pub const HNSW_VERTEX_SCHEMA_ID: u32 = hash_ident!("_hnsw_vertex"); 88 | 89 | pub fn hnsw_vertex_schema() -> MorpheusSchema { 90 | MorpheusSchema::new_vertex_with_id( 91 | HNSW_VERTEX_SCHEMA_ID, 92 | "_hnsw_vertex", 93 | None, 94 | &vec![Field::new_indexed(CELL, Type::Id, vec![IndexType::Hashed])], 95 | false, 96 | ) 97 | } 98 | 99 | pub fn cell_vertex(cell: &C) -> Vertex { 100 | Vertex::new(HNSW_VERTEX_SCHEMA_ID, data_map!(CELL: cell.id())) 101 | } 102 | 103 | const MAX_LEVEL: usize = MAX_LEVEL_CAP; 104 | const DEFAULT_LEVEL_NAME: &str = "DEFAULT_LEVEL"; 105 | 106 | lazy_static! { 107 | pub static ref LEVEL_SCHEMAS: Vec = { 108 | (0..MAX_LEVEL) 109 | .map(|level| edge_schema(DEFAULT_LEVEL_NAME, level)) 110 | .collect() 111 | }; 112 | } 113 | 114 | pub async fn initialize_schemas(morph: &Arc) -> Result<(), String> { 115 | let neb = morph.neb_client.as_ref(); 116 | for level_schema in LEVEL_SCHEMAS.iter() { 117 | let schema_id = level_schema.id; 118 | if neb.schema_client.get(&schema_id).await.unwrap().is_none() { 119 | morph 120 | .schema_container 121 | .new_schema(level_schema.clone()) 122 | .await 123 | .map_err(|e| { 124 | format!( 125 | "Failed to create level {} schema: {:?}", 126 | level_schema.name, e 127 | ) 128 | })?; 129 | } 130 | } 131 | if neb 132 | .schema_client 133 | .get(&HNSW_VERTEX_SCHEMA_ID) 134 | .await 135 | .unwrap() 136 | .is_none() 137 | { 138 | morph 139 | .schema_container 140 | .new_schema(hnsw_vertex_schema()) 141 | .await 142 | .map_err(|e| format!("Failed to create HNSW vertex schema: {:?}", e))?; 143 | } 144 | if neb 145 | .schema_client 146 | .get(&INDEX_SCHEMA_ID) 147 | .await 148 | .unwrap() 149 | .is_none() 150 | { 151 | neb.new_schema_with_id(index_schema()) 152 | .await 153 | .map_err(|e| format!("Failed to create index schema: {:?}", e))? 154 | .map_err(|e| format!("Failed to create index schema: {:?}", e))?; 155 | } 156 | Ok(()) 157 | } 158 | -------------------------------------------------------------------------------- /src/apps/hnsw/partition/service.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::{BTreeMap, HashSet}, 3 | future, mem, 4 | sync::Arc, 5 | }; 6 | 7 | use bifrost::{ 8 | conshash::ConsistentHashing, dispatch_rpc_service_functions, raft::client::RaftClient, service, 9 | service_with_id, 10 | }; 11 | use dovahkiin::types::{Id, OwnedPrimArray}; 12 | use futures::{future::BoxFuture, FutureExt}; 13 | use itertools::Itertools; 14 | use lightning::map::{Map, PtrHashMap}; 15 | use neb::ram::chunk::Chunks; 16 | 17 | use crate::{ 18 | apps::hnsw::measurements::{Metric, MetricEncoding}, 19 | graph::GraphEngine, 20 | job::{ 21 | logger::{append_job_log, JobLogLevel, JobLogger}, 22 | JobId, 23 | }, 24 | traversal::navigation::Distance, 25 | }; 26 | 27 | use super::{ 28 | index::HNSWIndex, 29 | schema::LEVEL_SCHEMAS, 30 | search::{HnswOnlinePartition, PartitionSearch}, 31 | types::OrderedFloat, 32 | }; 33 | 34 | pub use super::service::service::HNSWPartitionService; 35 | 36 | pub const HNSW_PARTITION_SERVICE_ID: u64 = hash_ident!("HNSW_PARTITION_SERVICE"); 37 | 38 | pub mod service { 39 | use crate::{apps::hnsw::measurements::MetricEncoding, job::JobId}; 40 | use bifrost::service; 41 | 42 | use super::*; 43 | 44 | service! { 45 | // For a partition to do anything with a vector, first start a job 46 | rpc new_job(job_id: JobId, schema: u32, field_id: u64, query: OwnedPrimArray, k: u64, ef: u64, ef_construction: u64, metric: MetricEncoding) -> Result<(), String>; 47 | rpc end_job(job_id: JobId) -> bool; 48 | 49 | // Then use multiple iterations (or not) to do the actual search 50 | // It returns remove frontiers to be processed by other partitions 51 | rpc next_iteration(readonly: bool, job_id: JobId) -> Result, String>; 52 | rpc set_frontiers(job_id: JobId, frontiers: Vec) -> Result<(), String>; 53 | 54 | // After a search is completed, it can call following functions 55 | rpc index_cell(job_id: JobId, cell_id: Id) -> Result<(), String>; 56 | rpc top_k(job_id: JobId, k: u32) -> Result, String>; 57 | rpc top(job_id: JobId) -> Result<(Id, Distance), String>; 58 | 59 | rpc get_last_result(job_id: JobId) -> Result, String>; 60 | 61 | // Miscs 62 | rpc new_index(id: Id, name: String, schema: u32, field_id: u64) -> Result<(), String>; 63 | } 64 | 65 | pub struct HNSWPartitionService { 66 | pub partition: HnswOnlinePartition, 67 | pub job_logger: Arc, 68 | pub jobs: PtrHashMap>>, 69 | } 70 | 71 | dispatch_rpc_service_functions!(HNSWPartitionService); 72 | service_with_id!(HNSWPartitionService, HNSW_PARTITION_SERVICE_ID); 73 | } 74 | 75 | impl service::HNSWPartitionService { 76 | pub async fn new( 77 | server_id: u64, 78 | conshash: &Arc, 79 | raft_client: &Arc, 80 | chunks: &Arc, 81 | engine: &Arc, 82 | job_logger: &Arc, 83 | ) -> Result { 84 | let partition = HnswOnlinePartition::new(server_id, conshash, raft_client, chunks, engine) 85 | .await 86 | .map_err(|e| format!("Failed to create HnswOnlinePartition: {:?}", e))?; 87 | Ok(Self { 88 | partition, 89 | job_logger: job_logger.clone(), 90 | jobs: PtrHashMap::with_capacity(32), 91 | }) 92 | } 93 | } 94 | 95 | impl service::Service for service::HNSWPartitionService { 96 | fn new_job<'a>( 97 | &'a self, 98 | job_id: JobId, 99 | schema: u32, 100 | field_id: u64, 101 | query: OwnedPrimArray, 102 | k: u64, 103 | ef: u64, 104 | ef_construction: u64, 105 | metric: MetricEncoding, 106 | ) -> BoxFuture<'a, Result<(), String>> { 107 | async move { 108 | self.partition 109 | .new_search( 110 | schema, 111 | field_id, 112 | query, 113 | k as usize, 114 | ef as usize, 115 | ef_construction as usize, 116 | metric, 117 | ) 118 | .await 119 | .map(|job| { 120 | let job = Arc::new(async_std::sync::Mutex::new(job)); 121 | self.jobs.insert(job_id, job); 122 | }) 123 | .map_err(|e| format!("Failed to create job: {:?}", e)) 124 | } 125 | .boxed() 126 | } 127 | 128 | fn end_job<'a>(&'a self, job_id: JobId) -> BoxFuture<'a, bool> { 129 | let result = self.jobs.remove(&job_id).is_some(); 130 | append_job_log( 131 | &self.job_logger, 132 | job_id, 133 | JobLogLevel::Info, 134 | format!("Job ended {} for {:?}", result, job_id), 135 | ); 136 | future::ready(result).boxed() 137 | } 138 | 139 | fn get_last_result<'a>( 140 | &'a self, 141 | job_id: JobId, 142 | ) -> BoxFuture<'a, Result, String>> { 143 | async move { 144 | let job = match self 145 | .jobs 146 | .get(&job_id) 147 | .ok_or(format!("Job {:?} not found", job_id)) 148 | { 149 | Ok(job) => job, 150 | Err(msg) => { 151 | append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone()); 152 | return Err(msg); 153 | } 154 | }; 155 | let job = job.lock().await; 156 | Ok(job.history.to_vec()) 157 | } 158 | .boxed() 159 | } 160 | 161 | fn set_frontiers<'a>( 162 | &'a self, 163 | job_id: JobId, 164 | frontiers: Vec, 165 | ) -> BoxFuture<'a, Result<(), String>> { 166 | async move { 167 | let job = match self 168 | .jobs 169 | .get(&job_id) 170 | .ok_or(format!("Job {:?} not found", job_id)) 171 | { 172 | Ok(job) => job, 173 | Err(msg) => { 174 | append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone()); 175 | return Err(msg); 176 | } 177 | }; 178 | let mut job = job.lock().await; 179 | job.frontier = frontiers; 180 | Ok(()) 181 | } 182 | .boxed() 183 | } 184 | 185 | fn next_iteration<'a>( 186 | &'a self, 187 | readonly: bool, 188 | job_id: JobId, 189 | ) -> BoxFuture<'a, Result, String>> { 190 | async move { 191 | let job = match self 192 | .jobs 193 | .get(&job_id) 194 | .ok_or(format!("Job {:?} not found", job_id)) 195 | { 196 | Ok(job) => job, 197 | Err(msg) => { 198 | append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone()); 199 | return Err(msg); 200 | } 201 | }; 202 | let mut job = job.lock().await; 203 | let metric = Metric::from_encoding(job.metric); 204 | let logger = &self.job_logger; 205 | let result = match metric { 206 | Metric::L2(metric) => { 207 | self.partition 208 | .next_iteration(&mut job, metric, readonly, logger, job_id) 209 | .await 210 | } 211 | Metric::Cosine(metric) => { 212 | self.partition 213 | .next_iteration(&mut job, metric, readonly, logger, job_id) 214 | .await 215 | } 216 | Metric::Manhattan(metric) => { 217 | self.partition 218 | .next_iteration(&mut job, metric, readonly, logger, job_id) 219 | .await 220 | } 221 | Metric::Chebyshev(metric) => { 222 | self.partition 223 | .next_iteration(&mut job, metric, readonly, logger, job_id) 224 | .await 225 | } 226 | }; 227 | match result { 228 | Ok(_) => { 229 | job.last_distance = job.metadata.last_distance; 230 | job.metadata.last_distance = Distance::INFINITY; 231 | Ok(mem::take(&mut job.metadata.remote_frontiers)) 232 | } 233 | Err(e) => { 234 | let msg = format!("Failed to run next iteration: {:?}", e); 235 | append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone()); 236 | Err(msg) 237 | } 238 | } 239 | } 240 | .boxed() 241 | } 242 | 243 | fn index_cell<'a>(&'a self, job_id: JobId, cell_id: Id) -> BoxFuture<'a, Result<(), String>> { 244 | async move { 245 | self.new_vertex(job_id, cell_id) 246 | .await 247 | .map_err(|e| format!("Failed to index cell: {:?}", e)) 248 | .map(|_| ()) 249 | } 250 | .boxed() 251 | } 252 | 253 | fn top_k<'a>( 254 | &'a self, 255 | job_id: JobId, 256 | k: u32, 257 | ) -> BoxFuture<'a, Result, String>> { 258 | async move { 259 | let results = self.bfs_search(job_id, 0, k).await?; 260 | Ok(results 261 | .into_iter() 262 | .map(|(_vid, id, distance)| (id, distance)) 263 | .collect()) 264 | } 265 | .boxed() 266 | } 267 | 268 | fn top<'a>(&'a self, job_id: JobId) -> BoxFuture<'a, Result<(Id, Distance), String>> { 269 | async move { 270 | let search = match self 271 | .jobs 272 | .get(&job_id) 273 | .ok_or(format!("Job {:?} not found", job_id)) 274 | { 275 | Ok(job) => job, 276 | Err(msg) => { 277 | append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone()); 278 | return Err(msg); 279 | } 280 | }; 281 | let search = search.lock().await; 282 | let result = search 283 | .history 284 | .iter() 285 | .sorted_by_key(|(_, distance)| OrderedFloat(*distance)) 286 | .next() 287 | .ok_or("No results found")?; 288 | Ok(result.clone()) 289 | } 290 | .boxed() 291 | } 292 | 293 | fn new_index<'a>( 294 | &'a self, 295 | id: Id, 296 | name: String, 297 | schema: u32, 298 | field_id: u64, 299 | ) -> ::futures::future::BoxFuture<'a, Result<(), String>> { 300 | let index = HNSWIndex::new(id, name, schema, field_id); 301 | self.partition.indices.insert( 302 | (schema, field_id), 303 | Arc::new(async_std::sync::RwLock::new(index)), 304 | ); 305 | future::ready(Ok(())).boxed() 306 | } 307 | } 308 | 309 | // Partition coordination 310 | impl HNSWPartitionService { 311 | async fn bfs_search<'a>( 312 | &'a self, 313 | job_id: JobId, 314 | level: u32, 315 | k: u32, 316 | ) -> Result, String> { 317 | let job = match self 318 | .jobs 319 | .get(&job_id) 320 | .ok_or(format!("Job {:?} not found", job_id)) 321 | { 322 | Ok(job) => job, 323 | Err(msg) => { 324 | append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone()); 325 | return Err(msg); 326 | } 327 | }; 328 | let job = job.lock().await; 329 | let level_schema = LEVEL_SCHEMAS[level as usize].id; 330 | let field_id = job.field_id; 331 | let metric = job.metric; 332 | append_job_log( 333 | &self.job_logger, 334 | job_id, 335 | JobLogLevel::Info, 336 | format!("Starting BFS search for job {:?}", job_id), 337 | ); 338 | match self 339 | .partition 340 | .search_top_k( 341 | level_schema, 342 | field_id, 343 | k as usize, 344 | metric, 345 | &job.metadata, 346 | &self.job_logger, 347 | job_id, 348 | ) 349 | .await 350 | { 351 | Ok(results) => { 352 | append_job_log( 353 | &self.job_logger, 354 | job_id, 355 | JobLogLevel::Info, 356 | format!( 357 | "BFS search completed for job {:?} with {} results", 358 | job_id, 359 | results.len() 360 | ), 361 | ); 362 | Ok(results) 363 | } 364 | Err(e) => { 365 | let msg = format!("BFS search failed: {:?}", e); 366 | append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone()); 367 | Err(msg) 368 | } 369 | } 370 | } 371 | 372 | async fn new_vertex<'a>(&'a self, job_id: JobId, cell_id: Id) -> Result { 373 | let job = match self 374 | .jobs 375 | .get(&job_id) 376 | .ok_or(format!("Job {:?} not found", job_id)) 377 | { 378 | Ok(job) => job, 379 | Err(msg) => { 380 | append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone()); 381 | return Err(msg); 382 | } 383 | }; 384 | let job = job.lock().await; 385 | debug_assert_eq!( 386 | self.partition.conshash.get_server_id(cell_id.higher), 387 | Some(self.partition.server_id) 388 | ); 389 | let metric = Metric::from_encoding(job.metric); 390 | let max_level = job.max_level; 391 | let result = match metric { 392 | Metric::L2(metric) => self 393 | .partition 394 | .new_vertex( 395 | job_id, 396 | cell_id, 397 | job.schema, 398 | job.field_id, 399 | metric, 400 | &job.metadata, 401 | max_level, 402 | &self.job_logger, 403 | ) 404 | .await 405 | .map_err(|e| format!("Failed to create vertex: {:?}", e)), 406 | Metric::Cosine(metric) => self 407 | .partition 408 | .new_vertex( 409 | job_id, 410 | cell_id, 411 | job.schema, 412 | job.field_id, 413 | metric, 414 | &job.metadata, 415 | max_level, 416 | &self.job_logger, 417 | ) 418 | .await 419 | .map_err(|e| format!("Failed to create vertex: {:?}", e)), 420 | Metric::Chebyshev(metric) => self 421 | .partition 422 | .new_vertex( 423 | job_id, 424 | cell_id, 425 | job.schema, 426 | job.field_id, 427 | metric, 428 | &job.metadata, 429 | max_level, 430 | &self.job_logger, 431 | ) 432 | .await 433 | .map_err(|e| format!("Failed to create vertex: {:?}", e)), 434 | Metric::Manhattan(metric) => self 435 | .partition 436 | .new_vertex( 437 | job_id, 438 | cell_id, 439 | job.schema, 440 | job.field_id, 441 | metric, 442 | &job.metadata, 443 | max_level, 444 | &self.job_logger, 445 | ) 446 | .await 447 | .map_err(|e| format!("Failed to create vertex: {:?}", e)), 448 | }; 449 | result 450 | } 451 | } 452 | -------------------------------------------------------------------------------- /src/apps/hnsw/partition/types.rs: -------------------------------------------------------------------------------- 1 | use std::cell::RefCell; 2 | use std::collections::{BTreeMap, HashSet}; 3 | use std::rc::Rc; 4 | 5 | use ahash::HashMap; 6 | use bifrost::rpc::RPCError; 7 | use dovahkiin::types::{Id, OwnedPrimArray}; 8 | use neb::ram::cell::ReadError; 9 | use neb::{client::transaction::TxnError, ram::cell::WriteError}; 10 | 11 | use crate::graph::{edge::EdgeError, NeighbourhoodError}; 12 | use crate::traversal::navigation::Distance; 13 | use crate::utils::ring_buffer::RingBuffer; 14 | 15 | /// A wrapper for f32 that implements Ord by using partial_cmp and treating NaN values as equal 16 | #[derive(Debug, Copy, Clone, PartialEq, PartialOrd)] 17 | pub struct OrderedFloat(pub f32); 18 | 19 | impl Eq for OrderedFloat {} 20 | 21 | impl Ord for OrderedFloat { 22 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 23 | self.partial_cmp(other).unwrap_or(std::cmp::Ordering::Equal) 24 | } 25 | } 26 | 27 | pub struct ReverseOrd(pub T); 28 | impl Ord for ReverseOrd { 29 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 30 | other.0.cmp(&self.0) // Reversed comparison 31 | } 32 | } 33 | impl PartialOrd for ReverseOrd { 34 | fn partial_cmp(&self, other: &Self) -> Option { 35 | Some(self.cmp(other)) 36 | } 37 | } 38 | impl PartialEq for ReverseOrd { 39 | fn eq(&self, other: &Self) -> bool { 40 | self.0 == other.0 41 | } 42 | } 43 | impl Eq for ReverseOrd {} 44 | 45 | #[derive(Debug)] 46 | pub enum HNSWIndexError { 47 | RPCError(RPCError), 48 | IndexFormatError(String), 49 | IndexNotFound, 50 | EdgeError(EdgeError), 51 | TxnError(TxnError), 52 | NeighbourhoodError(NeighbourhoodError), 53 | WriteError(WriteError), 54 | ReadError(ReadError), 55 | UpdateError(TxnError), 56 | } 57 | 58 | pub struct SearchMetadata { 59 | pub level: usize, 60 | pub visited: HashSet, 61 | pub history: RingBuffer<(Id, Distance)>, 62 | pub level_entries: ahash::HashMap, 63 | pub remote_frontiers: HashSet, 64 | pub last_distance: Distance, 65 | pub vertex_distance_cache: RefCell>, 66 | pub vertex_cache: RefCell>, 67 | pub vertex_vector_cache: RefCell>>, 68 | pub ef: usize, // Extension factor for search, controls exploration vs. exploitation 69 | pub ef_construction: usize, // Extension factor for construction, controls how many neighbors to consider when building the graph 70 | } 71 | 72 | unsafe impl Sync for SearchMetadata {} 73 | unsafe impl Send for SearchMetadata {} 74 | 75 | impl SearchMetadata { 76 | pub fn set_query_vector(&self, query: OwnedPrimArray) { 77 | self.vertex_vector_cache 78 | .borrow_mut() 79 | .insert(Id::unit_id(), Rc::new(query)); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/apps/hnsw/partition/utils.rs: -------------------------------------------------------------------------------- 1 | use crate::apps::hnsw::partition::types::HNSWIndexError; 2 | use crate::graph::partitioner::vector::FromVectorPartitioner; 3 | use crate::traversal::navigation::Distance; 4 | use crate::utils::ring_buffer::RingBuffer; 5 | use dovahkiin::types::Id; 6 | use neb::ram::cell::OwnedCell; 7 | use rand::Rng; 8 | 9 | /// Generate a level based on geometric distribution with P = 1/M 10 | /// This matches standard HNSW behavior. 11 | pub fn generate_random_level(prob: f64, max_level_cap: usize) -> usize { 12 | let mut rng = rand::rng(); 13 | let mut level = 0; 14 | while rng.random::() < prob && level < max_level_cap { 15 | level += 1; 16 | } 17 | level 18 | } 19 | 20 | // Helper function to determine the maximum connections for a level 21 | pub fn get_max_connections_for_level(level: usize) -> usize { 22 | if level == 0 { 23 | // More connections at the lowest level 24 | 16 25 | } else { 26 | // Fewer connections at higher levels 27 | 8 28 | } 29 | } 30 | 31 | pub fn merge_history( 32 | history: &RingBuffer<(Id, Distance)>, 33 | new_history: &RingBuffer<(Id, Distance)>, 34 | ) -> RingBuffer<(Id, Distance)> { 35 | history.merge_sorted(new_history, |a, b| b.1.partial_cmp(&a.1).unwrap()) 36 | } 37 | 38 | pub fn data_partition_key(field_id: u64, cell: &OwnedCell) -> Result { 39 | let partitioner = FromVectorPartitioner::new(field_id); 40 | let key = partitioner.vertex_partition_key(cell); 41 | if key == 0 { 42 | Err(HNSWIndexError::IndexFormatError(format!( 43 | "Index {:?} has no partition key", 44 | cell 45 | ))) 46 | } else { 47 | Ok(key) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/apps/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod hnsw; 2 | -------------------------------------------------------------------------------- /src/config/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::{server::MorphesOptions, utils::file::slurp}; 2 | use serde_yaml; 3 | 4 | pub fn options_from_file<'a>(file: &'a str) -> MorphesOptions { 5 | let file_text = slurp(file).unwrap(); 6 | let mut config: MorphesOptions = serde_yaml::from_str(&file_text).unwrap(); 7 | config.storage.memory_size *= 1024 * 1024; 8 | return config; 9 | } 10 | -------------------------------------------------------------------------------- /src/graph/edge/bilateral.rs: -------------------------------------------------------------------------------- 1 | use dovahkiin::types::{Map, OwnedMap, OwnedValue}; 2 | use futures::future::BoxFuture; 3 | use futures::{FutureExt, TryFutureExt}; 4 | use neb::client::transaction::{Transaction, TxnError}; 5 | use neb::ram::cell::OwnedCell; 6 | use neb::ram::types::Id; 7 | use std::future; 8 | use std::sync::Arc; 9 | 10 | use super::super::id_list::IdList; 11 | use super::{EdgeAttributes, EdgeError, TEdge}; 12 | use crate::server::schema::{GraphSchema, SchemaContainer}; 13 | 14 | use rand::prelude::*; 15 | 16 | pub trait BilateralEdge: TEdge + Sync + Send { 17 | fn vertex_a_field() -> u64; 18 | fn vertex_b_field() -> u64; 19 | 20 | fn vertex_a(&self) -> &Id; 21 | fn vertex_b(&self) -> &Id; 22 | 23 | fn edge_a_field() -> u64; 24 | fn edge_b_field() -> u64; 25 | 26 | fn build_edge(a_field: Id, b_field: Id, schema_id: u32, cell: Option) -> Self::Edge; 27 | fn edge_cell(&self) -> &Option; 28 | fn schema_id(&self) -> u32; 29 | 30 | fn into_cell(self) -> Option; 31 | fn edge_cell_mut(&mut self) -> Option<&mut OwnedCell>; 32 | 33 | fn from_id<'a>( 34 | vertex_id: Id, 35 | edge_attrs: &'a EdgeAttributes, 36 | edge_schema_id: u32, 37 | txn: &'a Transaction, 38 | id: Id, 39 | ) -> BoxFuture<'a, Result, TxnError>> { 40 | // println!("getting edge from id: {:?}, schema_id: {:?}", id, schema_id); 41 | if edge_attrs.has_body { 42 | return txn 43 | .read(id) 44 | .map_ok(move |trace_cell| { 45 | let trace_cell = match trace_cell { 46 | Some(cell) => cell, 47 | None => return Err(EdgeError::CellNotFound), 48 | }; 49 | let mut a_id = Id::unit_id(); 50 | let mut b_id = Id::unit_id(); 51 | let edge_cell = if edge_attrs.edge_type == Self::edge_type() { 52 | if let (&OwnedValue::Id(e_a_id), &OwnedValue::Id(e_b_id)) = ( 53 | &trace_cell.data[Self::edge_a_field()], 54 | &trace_cell.data[Self::edge_b_field()], 55 | ) { 56 | a_id = e_a_id; 57 | b_id = e_b_id; 58 | } 59 | Some(trace_cell) 60 | } else { 61 | return Err(EdgeError::WrongEdgeType); 62 | }; 63 | Ok(Self::build_edge(a_id, b_id, edge_schema_id, edge_cell)) 64 | }) 65 | .boxed(); 66 | } else { 67 | let a_id = vertex_id; 68 | let b_id = id; 69 | let res = Ok(Ok(Self::build_edge(a_id, b_id, edge_schema_id, None))); 70 | future::ready(res).boxed() 71 | } 72 | } 73 | 74 | fn link<'a>( 75 | vertex_a_id: Id, 76 | vertex_b_id: Id, 77 | body: &'a Option, 78 | txn: &'a Transaction, 79 | schema_id: u32, 80 | edge_attrs: &'a EdgeAttributes, 81 | ) -> BoxFuture<'a, Result, TxnError>> { 82 | async move { 83 | let vertex_a_pointer; 84 | let vertex_b_pointer; 85 | let edge_cell = { 86 | if edge_attrs.edge_type != Self::edge_type() { 87 | return Ok(Err(EdgeError::WrongEdgeType)); 88 | } 89 | if edge_attrs.has_body { 90 | if let Some(body_map) = body { 91 | let edge_id_lower = { 92 | let mut rng = rand::rng(); 93 | rng.next_u64() 94 | }; 95 | let mut edge_body_cell = OwnedCell::new_with_id( 96 | schema_id, 97 | &Id::new(vertex_a_id.higher, edge_id_lower), 98 | OwnedValue::Map(body_map.owned()), 99 | ); 100 | let edge_body_id = edge_body_cell.id(); 101 | edge_body_cell.data[Self::edge_a_field()] = OwnedValue::Id(vertex_a_id); 102 | edge_body_cell.data[Self::edge_b_field()] = OwnedValue::Id(vertex_b_id); 103 | txn.write(edge_body_cell.clone()).await?; 104 | vertex_a_pointer = edge_body_id; 105 | vertex_b_pointer = edge_body_id; 106 | Some(edge_body_cell) 107 | } else { 108 | return Ok(Err(EdgeError::NormalEdgeShouldHaveBody)); 109 | } 110 | } else { 111 | if body.is_none() { 112 | vertex_a_pointer = vertex_b_id; 113 | vertex_b_pointer = vertex_a_id; 114 | None 115 | } else { 116 | return Ok(Err(EdgeError::SimpleEdgeShouldNotHaveBody)); 117 | } 118 | } 119 | }; 120 | // Add vertex_a_pointer to vertex_a's id list 121 | let a_result = 122 | IdList::from_txn_and_container(txn, vertex_a_id, Self::vertex_a_field(), schema_id) 123 | .add(&vertex_a_pointer) 124 | .await? 125 | .map_err(EdgeError::IdListError); 126 | if let Err(e) = a_result { 127 | return Ok(Err(e)); 128 | } 129 | 130 | // Add vertex_b_pointer to vertex_b's id list 131 | let b_result = 132 | IdList::from_txn_and_container(txn, vertex_b_id, Self::vertex_b_field(), schema_id) 133 | .add(&vertex_b_pointer) 134 | .await? 135 | .map_err(EdgeError::IdListError); 136 | if let Err(e) = b_result { 137 | return Ok(Err(e)); 138 | } 139 | 140 | Ok(Ok(Self::build_edge( 141 | vertex_a_id, 142 | vertex_b_id, 143 | schema_id, 144 | edge_cell, 145 | ))) 146 | } 147 | .boxed() 148 | } 149 | 150 | fn remove<'a>( 151 | &'a self, 152 | txn: &'a Transaction, 153 | ) -> BoxFuture<'a, Result, TxnError>> { 154 | async move { 155 | let (v_a_removal, v_b_removal) = match self.edge_cell() { 156 | &Some(ref cell) => { 157 | txn.remove(cell.id()).await?; 158 | (cell.id(), cell.id()) 159 | } 160 | &None => (*self.vertex_b(), *self.vertex_a()), 161 | }; 162 | match IdList::from_txn_and_container( 163 | txn, 164 | *self.vertex_a(), 165 | Self::vertex_a_field(), 166 | self.schema_id(), 167 | ) 168 | .remove(&v_a_removal, false) 169 | .await? 170 | .map_err(EdgeError::IdListError) 171 | { 172 | Err(e) => return Ok(Err(e)), 173 | _ => {} 174 | } 175 | match IdList::from_txn_and_container( 176 | txn, 177 | *self.vertex_b(), 178 | Self::vertex_b_field(), 179 | self.schema_id(), 180 | ) 181 | .remove(&v_b_removal, false) 182 | .await? 183 | .map_err(EdgeError::IdListError) 184 | { 185 | Err(e) => return Ok(Err(e)), 186 | _ => {} 187 | } 188 | Ok(Ok(())) 189 | } 190 | .boxed() 191 | } 192 | 193 | fn oppisite_vertex_id(&self, vertex_id: &Id) -> Option<&Id> { 194 | let v1_id = self.vertex_a(); 195 | let v2_id = self.vertex_b(); 196 | if v1_id == vertex_id { 197 | Some(v2_id) 198 | } else if v2_id == vertex_id { 199 | Some(v1_id) 200 | } else { 201 | None 202 | } 203 | } 204 | 205 | fn vertex_ids(&self) -> (Id, Id) { 206 | (*self.vertex_a(), *self.vertex_b()) 207 | } 208 | 209 | fn cell_id(&self) -> Option { 210 | self.edge_cell().as_ref().map(|cell| cell.id()) 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /src/graph/edge/directed.rs: -------------------------------------------------------------------------------- 1 | use dovahkiin::types::Type; 2 | use neb::ram::cell::OwnedCell; 3 | use neb::ram::schema::Field; 4 | use neb::ram::types::Id; 5 | use neb::ram::types::OwnedValue; 6 | 7 | use super::bilateral::BilateralEdge; 8 | use super::{EdgeType, TEdge}; 9 | use crate::graph::fields::*; 10 | 11 | lazy_static! { 12 | pub static ref EDGE_TEMPLATE: Vec = vec![ 13 | Field::new_unindexed(&*INBOUND_NAME, Type::Id), 14 | Field::new_unindexed(&*OUTBOUND_NAME, Type::Id), 15 | ]; 16 | } 17 | 18 | #[derive(Debug)] 19 | pub struct DirectedEdge { 20 | inbound_id: Id, 21 | outbound_id: Id, 22 | schema_id: u32, 23 | pub cell: Option, 24 | } 25 | 26 | impl TEdge for DirectedEdge { 27 | type Edge = DirectedEdge; 28 | fn edge_type() -> EdgeType { 29 | EdgeType::Directed 30 | } 31 | } 32 | 33 | impl BilateralEdge for DirectedEdge { 34 | fn vertex_a_field() -> u64 { 35 | *OUTBOUND_KEY_ID 36 | } 37 | 38 | fn vertex_b_field() -> u64 { 39 | *INBOUND_KEY_ID 40 | } 41 | 42 | fn vertex_a(&self) -> &Id { 43 | &self.inbound_id 44 | } 45 | 46 | fn vertex_b(&self) -> &Id { 47 | &self.outbound_id 48 | } 49 | 50 | fn edge_a_field() -> u64 { 51 | *INBOUND_KEY_ID 52 | } 53 | 54 | fn edge_b_field() -> u64 { 55 | *OUTBOUND_KEY_ID 56 | } 57 | 58 | fn build_edge(a_field: Id, b_field: Id, schema_id: u32, cell: Option) -> Self::Edge { 59 | DirectedEdge { 60 | inbound_id: a_field, 61 | outbound_id: b_field, 62 | schema_id, 63 | cell, 64 | } 65 | } 66 | 67 | fn edge_cell(&self) -> &Option { 68 | &self.cell 69 | } 70 | 71 | fn edge_cell_mut(&mut self) -> Option<&mut OwnedCell> { 72 | self.cell.as_mut() 73 | } 74 | 75 | fn schema_id(&self) -> u32 { 76 | self.schema_id 77 | } 78 | 79 | fn into_cell(self) -> Option { 80 | self.cell 81 | } 82 | } 83 | 84 | pub struct DirectedHyperEdge { 85 | inbound_ids: Vec, 86 | outbound_ids: Vec, 87 | cell: OwnedCell, 88 | } 89 | 90 | edge_index!(DirectedEdge); 91 | -------------------------------------------------------------------------------- /src/graph/edge/hyper.rs: -------------------------------------------------------------------------------- 1 | use neb::ram::schema::Field; 2 | use neb::ram::types::Type; 3 | 4 | lazy_static! { 5 | pub static ref EDGE_TEMPLATE: Vec = 6 | vec![Field::new_unindexed_array("_vertices", Type::Id,)]; 7 | } 8 | -------------------------------------------------------------------------------- /src/graph/edge/macros.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! edge_index { 3 | ($struc: ident) => { 4 | use std::ops::{Index, IndexMut}; 5 | impl Index for $struc { 6 | type Output = OwnedValue; 7 | fn index(&self, index: u64) -> &Self::Output { 8 | if let Some(ref cell) = self.cell { 9 | &cell[index] 10 | } else { 11 | &OwnedValue::Null 12 | } 13 | } 14 | } 15 | 16 | impl<'a> Index<&'a str> for $struc { 17 | type Output = OwnedValue; 18 | fn index(&self, index: &'a str) -> &Self::Output { 19 | if let Some(ref cell) = self.cell { 20 | &cell[index] 21 | } else { 22 | &OwnedValue::Null 23 | } 24 | } 25 | } 26 | 27 | impl<'a> IndexMut<&'a str> for $struc { 28 | fn index_mut(&mut self, index: &'a str) -> &mut Self::Output { 29 | if let &mut Some(ref mut cell) = &mut self.cell { 30 | &mut cell[index] 31 | } else { 32 | panic!("this edge have no cell"); 33 | } 34 | } 35 | } 36 | 37 | impl IndexMut for $struc { 38 | fn index_mut(&mut self, index: u64) -> &mut Self::Output { 39 | if let &mut Some(ref mut cell) = &mut self.cell { 40 | &mut cell[index] 41 | } else { 42 | panic!("this edge have no cell"); 43 | } 44 | } 45 | } 46 | }; 47 | } 48 | -------------------------------------------------------------------------------- /src/graph/edge/mod.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | mod macros; 3 | 4 | pub mod bilateral; 5 | pub mod directed; 6 | pub mod hyper; 7 | pub mod undirectd; 8 | 9 | use super::id_list::IdListError; 10 | use crate::graph::edge::bilateral::BilateralEdge; 11 | use crate::server::schema::{GraphSchema, SchemaContainer}; 12 | use dovahkiin::types::OwnedValue; 13 | use neb::client::transaction::{Transaction, TxnError}; 14 | use neb::ram::cell::OwnedCell; 15 | use neb::ram::types::Id; 16 | use std::ops::{Index, IndexMut}; 17 | use std::sync::Arc; 18 | 19 | #[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone, Copy)] 20 | pub enum EdgeType { 21 | Directed, 22 | Undirected, 23 | } 24 | 25 | #[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone, Copy)] 26 | pub struct EdgeAttributes { 27 | pub edge_type: EdgeType, 28 | pub has_body: bool, 29 | } 30 | 31 | impl EdgeAttributes { 32 | pub fn new(edge_type: EdgeType, has_body: bool) -> EdgeAttributes { 33 | EdgeAttributes { 34 | edge_type: edge_type, 35 | has_body: has_body, 36 | } 37 | } 38 | } 39 | 40 | #[derive(Debug, Serialize, Deserialize)] 41 | pub enum EdgeError { 42 | WrongSchema, 43 | CannotFindSchema, 44 | CellNotFound, 45 | WrongVertexField, 46 | WrongEdgeType, 47 | IdListError(IdListError), 48 | SimpleEdgeShouldNotHaveBody, 49 | NormalEdgeShouldHaveBody, 50 | FilterEvalError(String), 51 | } 52 | 53 | pub trait TEdge: Send + Sync { 54 | type Edge: TEdge + 'static; 55 | fn edge_type() -> EdgeType; 56 | } 57 | 58 | #[derive(Debug)] 59 | pub enum Edge { 60 | Directed(directed::DirectedEdge), 61 | Undirected(undirectd::UndirectedEdge), 62 | } 63 | 64 | impl Edge { 65 | pub async fn remove(&self, txn: &Transaction) -> Result, TxnError> { 66 | match self { 67 | Edge::Directed(e) => e.remove(txn).await, 68 | Edge::Undirected(e) => e.remove(txn).await, 69 | } 70 | } 71 | 72 | pub async fn get_data(&self) -> &Option { 73 | match self { 74 | &Edge::Directed(ref e) => e.edge_cell(), 75 | &Edge::Undirected(ref e) => e.edge_cell(), 76 | } 77 | } 78 | 79 | pub async fn get_data_mut(&mut self) -> Option<&mut OwnedCell> { 80 | match self { 81 | &mut Edge::Directed(ref mut e) => e.edge_cell_mut(), 82 | &mut Edge::Undirected(ref mut e) => e.edge_cell_mut(), 83 | } 84 | } 85 | 86 | pub fn vertex_id_opposite(&self, vertex_id: &Id) -> Option<&Id> { 87 | match self { 88 | &Edge::Directed(ref e) => e.oppisite_vertex_id(vertex_id), 89 | &Edge::Undirected(ref e) => e.oppisite_vertex_id(vertex_id), 90 | } 91 | } 92 | pub fn vertex_ids(&self) -> (Id, Id) { 93 | match self { 94 | &Edge::Directed(ref e) => e.vertex_ids(), 95 | &Edge::Undirected(ref e) => e.vertex_ids(), 96 | } 97 | } 98 | pub fn cell_id(&self) -> Option { 99 | match self { 100 | &Edge::Directed(ref e) => e.cell_id(), 101 | &Edge::Undirected(ref e) => e.cell_id(), 102 | } 103 | } 104 | 105 | pub fn edge_type(&self) -> EdgeType { 106 | match self { 107 | &Edge::Directed(_) => EdgeType::Directed, 108 | &Edge::Undirected(_) => EdgeType::Undirected, 109 | } 110 | } 111 | 112 | pub fn into_cell(self) -> Option { 113 | match self { 114 | Edge::Directed(e) => e.into_cell(), 115 | Edge::Undirected(e) => e.into_cell(), 116 | } 117 | } 118 | } 119 | 120 | impl Index for Edge { 121 | type Output = OwnedValue; 122 | fn index(&self, index: u64) -> &Self::Output { 123 | match self { 124 | &Edge::Directed(ref e) => &e[index], 125 | &Edge::Undirected(ref e) => &e[index], 126 | } 127 | } 128 | } 129 | 130 | impl<'a> Index<&'a str> for Edge { 131 | type Output = OwnedValue; 132 | fn index(&self, index: &'a str) -> &Self::Output { 133 | match self { 134 | &Edge::Directed(ref e) => &e[index], 135 | &Edge::Undirected(ref e) => &e[index], 136 | } 137 | } 138 | } 139 | 140 | impl<'a> IndexMut<&'a str> for Edge { 141 | fn index_mut(&mut self, index: &'a str) -> &mut Self::Output { 142 | match self { 143 | &mut Edge::Directed(ref mut e) => &mut e[index], 144 | &mut Edge::Undirected(ref mut e) => &mut e[index], 145 | } 146 | } 147 | } 148 | 149 | impl IndexMut for Edge { 150 | fn index_mut(&mut self, index: u64) -> &mut Self::Output { 151 | match self { 152 | &mut Edge::Directed(ref mut e) => &mut e[index], 153 | &mut Edge::Undirected(ref mut e) => &mut e[index], 154 | } 155 | } 156 | } 157 | 158 | pub async fn from_id( 159 | vertex_id: Id, 160 | edge_attrs: &EdgeAttributes, 161 | edge_schema_id: u32, 162 | txn: &Transaction, 163 | id: Id, 164 | ) -> Result, TxnError> { 165 | match edge_attrs.edge_type { 166 | EdgeType::Directed => { 167 | directed::DirectedEdge::from_id(vertex_id, edge_attrs, edge_schema_id, txn, id) 168 | .await 169 | .map(|r| r.map(Edge::Directed)) 170 | } 171 | EdgeType::Undirected => { 172 | undirectd::UndirectedEdge::from_id(vertex_id, edge_attrs, edge_schema_id, txn, id) 173 | .await 174 | .map(|r| r.map(Edge::Undirected)) 175 | } 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/graph/edge/undirectd.rs: -------------------------------------------------------------------------------- 1 | use dovahkiin::types::Type; 2 | use neb::ram::cell::OwnedCell; 3 | use neb::ram::schema::Field; 4 | use neb::ram::types::OwnedValue; 5 | use neb::ram::types::{key_hash, Id}; 6 | 7 | use super::bilateral::BilateralEdge; 8 | use super::{EdgeType, TEdge}; 9 | use crate::graph::fields::*; 10 | 11 | lazy_static! { 12 | pub static ref EDGE_VERTEX_A_NAME: String = String::from("_vertex_a"); 13 | pub static ref EDGE_VERTEX_B_NAME: String = String::from("_vertex_b"); 14 | pub static ref EDGE_TEMPLATE: Vec = vec![ 15 | Field::new_unindexed(&*EDGE_VERTEX_A_NAME, Type::Id), 16 | Field::new_unindexed(&*EDGE_VERTEX_B_NAME, Type::Id), 17 | ]; 18 | pub static ref EDGE_VERTEX_A_ID: u64 = key_hash(&*EDGE_VERTEX_A_NAME); 19 | pub static ref EDGE_VERTEX_B_ID: u64 = key_hash(&*EDGE_VERTEX_B_NAME); 20 | } 21 | 22 | #[derive(Debug)] 23 | pub struct UndirectedEdge { 24 | vertex_a_id: Id, 25 | vertex_b_id: Id, 26 | schema_id: u32, 27 | cell: Option, 28 | } 29 | 30 | impl TEdge for UndirectedEdge { 31 | type Edge = UndirectedEdge; 32 | fn edge_type() -> EdgeType { 33 | EdgeType::Undirected 34 | } 35 | } 36 | 37 | impl BilateralEdge for UndirectedEdge { 38 | fn vertex_a_field() -> u64 { 39 | *UNDIRECTED_KEY_ID 40 | } 41 | 42 | fn vertex_b_field() -> u64 { 43 | *UNDIRECTED_KEY_ID 44 | } 45 | 46 | fn vertex_a(&self) -> &Id { 47 | &self.vertex_a_id 48 | } 49 | 50 | fn vertex_b(&self) -> &Id { 51 | &self.vertex_b_id 52 | } 53 | 54 | fn edge_a_field() -> u64 { 55 | *EDGE_VERTEX_A_ID 56 | } 57 | 58 | fn edge_b_field() -> u64 { 59 | *EDGE_VERTEX_B_ID 60 | } 61 | 62 | fn build_edge(a_field: Id, b_field: Id, schema_id: u32, cell: Option) -> Self::Edge { 63 | UndirectedEdge { 64 | vertex_a_id: a_field, 65 | vertex_b_id: b_field, 66 | schema_id: schema_id, 67 | cell: cell, 68 | } 69 | } 70 | 71 | fn edge_cell(&self) -> &Option { 72 | &self.cell 73 | } 74 | fn schema_id(&self) -> u32 { 75 | self.schema_id 76 | } 77 | 78 | fn into_cell(self) -> Option { 79 | self.cell 80 | } 81 | 82 | fn edge_cell_mut(&mut self) -> Option<&mut OwnedCell> { 83 | self.cell.as_mut() 84 | } 85 | } 86 | 87 | edge_index!(UndirectedEdge); 88 | -------------------------------------------------------------------------------- /src/graph/fields.rs: -------------------------------------------------------------------------------- 1 | use dovahkiin::types::Type; 2 | use neb::ram::schema::Field; 3 | use neb::ram::types::key_hash; 4 | 5 | pub const INBOUND_KEY: &'static str = "_inbound"; 6 | pub const OUTBOUND_KEY: &'static str = "_outbound"; 7 | pub const UNDIRECTED_KEY: &'static str = "_undirected"; 8 | 9 | lazy_static! { 10 | pub static ref INBOUND_NAME: String = String::from(INBOUND_KEY); 11 | pub static ref OUTBOUND_NAME: String = String::from(OUTBOUND_KEY); 12 | pub static ref UNDIRECTED_NAME: String = String::from(UNDIRECTED_KEY); 13 | pub static ref VERTEX_TEMPLATE: Vec = vec![ 14 | Field::new_unindexed(&*OUTBOUND_NAME, Type::Id), 15 | Field::new_unindexed(&*INBOUND_NAME, Type::Id), 16 | Field::new_unindexed(&*UNDIRECTED_NAME, Type::Id), 17 | ]; 18 | pub static ref INBOUND_KEY_ID: u64 = key_hash(&*INBOUND_NAME); 19 | pub static ref OUTBOUND_KEY_ID: u64 = key_hash(&*OUTBOUND_NAME); 20 | pub static ref UNDIRECTED_KEY_ID: u64 = key_hash(&*UNDIRECTED_NAME); 21 | } 22 | -------------------------------------------------------------------------------- /src/graph/partitioner/mod.rs: -------------------------------------------------------------------------------- 1 | use ahash::AHashMap; 2 | use std::{collections::HashMap, collections::HashSet, hash::Hasher}; 3 | 4 | use dovahkiin::{ 5 | ahash::AHasher, 6 | types::{Id, OwnedValue}, 7 | }; 8 | use neb::ram::types::{RandId, RandValue}; 9 | 10 | use super::{local::LocalGraph, vertex::Vertex}; 11 | 12 | pub mod vector; 13 | #[cfg(test)] 14 | mod vector_test; 15 | 16 | use vector::*; 17 | 18 | /// Trait for determining the partition key for vertices 19 | pub trait Partitioner: Clone { 20 | /// Calculate a partition key based on one or two vertices 21 | fn partition_key(&self, v1: Option<&Vertex>, v2: Option<&Vertex>) -> u64; 22 | 23 | fn partition_local_graph(&self, graph: &mut LocalGraph); 24 | } 25 | 26 | /// Partitions based on copying the partition key from an adjacent (neighbor) vertex 27 | #[derive(Clone, Copy)] 28 | pub struct SameAsNeighbourPartitioner; 29 | 30 | impl Partitioner for SameAsNeighbourPartitioner { 31 | fn partition_key(&self, v1: Option<&Vertex>, v2: Option<&Vertex>) -> u64 { 32 | v2.map_or(0, |v| v.id().higher) 33 | } 34 | 35 | fn partition_local_graph(&self, graph: &mut LocalGraph) { 36 | if graph.vertices().is_empty() { 37 | return; 38 | } 39 | let mut counter = AHashMap::new(); 40 | let mut vertex_updates = AHashMap::new(); 41 | 42 | // Count partitions and identify vertices that need repartitioning 43 | for (vid, _) in graph.vertices() { 44 | let partition = vid.higher; 45 | if partition == 0 { 46 | vertex_updates.insert(*vid, 0u64); // Placeholder, will be updated with actual partition 47 | continue; 48 | } 49 | *counter.entry(partition).or_insert(0) += 1; 50 | } 51 | 52 | // Determine the target partition (most common or random if none) 53 | let partition = if !counter.is_empty() { 54 | *counter.iter().max_by_key(|(_, count)| *count).unwrap().0 55 | } else { 56 | Id::rand_lower().higher 57 | }; 58 | 59 | // Update the placeholder values with the actual partition 60 | for (_, part) in vertex_updates.iter_mut() { 61 | *part = partition; 62 | } 63 | 64 | // Prepare edge updates 65 | let mut edge_updates = AHashMap::new(); 66 | for (edge_id, edge) in graph.edges() { 67 | if edge_id.higher == 0 { 68 | // Assign edges to the same partition as their vertices 69 | edge_updates.insert(*edge_id, edge.vertex_a.higher); 70 | } 71 | } 72 | 73 | // Apply all updates at once using the update_ids method 74 | graph.update_ids(&vertex_updates, &edge_updates); 75 | } 76 | } 77 | 78 | /// Partitions based on a specific field from the vertex 79 | #[derive(Clone, Copy)] 80 | pub struct FromFieldPartitioner { 81 | field_id: u64, 82 | } 83 | 84 | impl FromFieldPartitioner { 85 | pub fn new(field_id: u64) -> Self { 86 | Self { field_id } 87 | } 88 | } 89 | 90 | impl Partitioner for FromFieldPartitioner { 91 | fn partition_key(&self, v1: Option<&Vertex>, v2: Option<&Vertex>) -> u64 { 92 | v1.map_or(0, |v| self.hash_field_value(v.schema(), &v.cell.data)) 93 | } 94 | 95 | fn partition_local_graph(&self, graph: &mut LocalGraph) { 96 | if graph.vertices().is_empty() { 97 | return; 98 | } 99 | // Get all vertices that need to be repartitioned 100 | let mut vertex_updates = AHashMap::new(); 101 | 102 | // First pass: identify vertices that need repartitioning 103 | for (vertex_id, vertex) in graph.vertices() { 104 | let partition = self.hash_field_value(vertex.cell.header.schema, &vertex.cell.data); 105 | if vertex_id.higher == 0 { 106 | vertex_updates.insert(*vertex_id, partition); 107 | } 108 | } 109 | 110 | // Update edge partitions and references 111 | let mut edge_updates = AHashMap::new(); 112 | for (edge_id, edge) in graph.edges() { 113 | if edge_id.higher == 0 { 114 | edge_updates.insert(*edge_id, edge.vertex_a.higher); 115 | } 116 | } 117 | 118 | graph.update_ids(&vertex_updates, &edge_updates); 119 | } 120 | } 121 | 122 | impl FromFieldPartitioner { 123 | fn hash_field_value(&self, schema_id: u32, value: &OwnedValue) -> u64 { 124 | // Implementation for hashing field value 125 | let hash_bytes = value[self.field_id].hash(); 126 | let mut hasher = AHasher::default(); 127 | hasher.write_u32(schema_id); 128 | hasher.write_u64(self.field_id); 129 | hasher.write(&hash_bytes); 130 | hasher.finish() 131 | } 132 | } 133 | 134 | /// Partitions by selecting a random partition key 135 | #[derive(Clone, Copy)] 136 | pub struct RandomPartitioner; 137 | 138 | impl Partitioner for RandomPartitioner { 139 | fn partition_key(&self, _v1: Option<&Vertex>, _v2: Option<&Vertex>) -> u64 { 140 | random_partition_key() 141 | } 142 | 143 | fn partition_local_graph(&self, graph: &mut LocalGraph) { 144 | if graph.vertices().is_empty() { 145 | return; 146 | } 147 | let mut vertex_updates = AHashMap::new(); 148 | for (vertex_id, _vertex) in graph.vertices() { 149 | if vertex_id.higher == 0 { 150 | vertex_updates.insert(*vertex_id, random_partition_key()); 151 | } 152 | } 153 | let mut edge_updates = AHashMap::new(); 154 | for (edge_id, edge) in graph.edges() { 155 | if edge_id.higher == 0 { 156 | edge_updates.insert(*edge_id, edge.vertex_a.higher); 157 | } 158 | } 159 | graph.update_ids(&vertex_updates, &edge_updates); 160 | } 161 | } 162 | 163 | // /// Partitions using a user-supplied function 164 | // #[derive(Clone)] 165 | // pub struct CustomPartitioner { 166 | // func: Box, Option<&Vertex>) -> u64 + Send + Sync + 'static + Clone>, 167 | // } 168 | // impl CustomPartitioner { 169 | // pub fn new(func: F) -> Self 170 | // where 171 | // F: Fn(Option<&Vertex>, Option<&Vertex>) -> u64 + 'static, 172 | // { 173 | // Self { func: Box::new(func) } 174 | // } 175 | // } 176 | 177 | // impl Partitioner for CustomPartitioner { 178 | // fn partition_key(&self, v1: Option<&Vertex>, v2: Option<&Vertex>) -> u64 { 179 | // (self.func)(v1, v2) 180 | // } 181 | // } 182 | 183 | fn random_partition_key() -> u64 { 184 | Id::rand().higher 185 | } 186 | 187 | #[derive(Clone, Copy)] 188 | pub struct DefaultPartitioner; 189 | 190 | impl Partitioner for DefaultPartitioner { 191 | fn partition_key(&self, _v1: Option<&Vertex>, _v2: Option<&Vertex>) -> u64 { 192 | return 0; 193 | } 194 | 195 | fn partition_local_graph(&self, _graph: &mut LocalGraph) {} 196 | } 197 | -------------------------------------------------------------------------------- /src/graph/partitioner/vector.rs: -------------------------------------------------------------------------------- 1 | // A vector partitioner that uses LSH with cached projection vectors 2 | // and then converts the resulting binary hash into Gray code. 3 | 4 | use ahash::AHashMap; 5 | use dovahkiin::types::{OwnedPrimArray, OwnedValue}; 6 | use neb::ram::cell::OwnedCell; 7 | use once_cell::sync::Lazy; 8 | use rand::prelude::*; 9 | use rand_distr::{Distribution, Normal}; 10 | use std::sync::RwLock; 11 | 12 | use crate::graph::vertex::Vertex; 13 | 14 | use super::{random_partition_key, Partitioner}; 15 | 16 | const NUM_BITS: usize = 64; 17 | 18 | // Global cache storing a native Vec> for the current dimension. 19 | // The Option holds (dimension, projection_vectors), where projection_vectors 20 | // is a Vec of projection vectors (each is a Vec), and the number of vectors equals the maximum requested bits so far. 21 | static PROJECTION_CACHE: Lazy>)>>> = 22 | Lazy::new(|| RwLock::new(None)); 23 | 24 | /// Deterministically generate `n_bits` projection vectors of dimension `dim` using a fixed seed. 25 | /// Returns a Vec of projection vectors (each vector is a Vec of length `dim`). 26 | fn generate_deterministic_vectors(dim: usize, n_bits: usize) -> Vec> { 27 | let seed: [u8; 32] = [42; 32]; // Fixed seed for determinism. 28 | let mut rng = rand::rngs::StdRng::from_seed(seed); 29 | let normal = Normal::new(0.0, 1.0).unwrap(); 30 | (0..n_bits) 31 | .map(|_| { 32 | (0..dim) 33 | .map(|_| normal.sample(&mut rng)) 34 | .collect::>() 35 | }) 36 | .collect() 37 | } 38 | 39 | /// Retrieves projection vectors for the given dimension and requested number of bits as a native Vec>. 40 | /// If the global cache is already populated for the same dimension and has at least `requested_bits` vectors, 41 | /// returns the first `requested_bits` vectors (cloned). Otherwise, regenerates and caches the new set. 42 | fn get_projection_vectors(dim: usize, requested_bits: usize) -> Vec> { 43 | { 44 | let cache = PROJECTION_CACHE.read().unwrap(); 45 | if let Some((cached_dim, ref vectors)) = *cache { 46 | if cached_dim == dim && vectors.len() >= requested_bits { 47 | // Return the first `requested_bits` vectors. 48 | return vectors[..requested_bits].to_vec(); 49 | } 50 | } 51 | } 52 | // Acquire a write lock to update the cache. 53 | let mut cache = PROJECTION_CACHE.write().unwrap(); 54 | // Regenerate the projection vectors for this dimension with the requested number of bits. 55 | let new_vectors = generate_deterministic_vectors(dim, requested_bits); 56 | *cache = Some((dim, new_vectors.clone())); 57 | new_vectors 58 | } 59 | 60 | /// Convert a binary number to its Gray code representation. 61 | fn binary_to_gray(n: u64) -> u64 { 62 | n ^ (n >> 1) 63 | } 64 | 65 | /// Normalize a vector to unit length 66 | fn normalize_vector(vector: &[f32]) -> Vec { 67 | let norm: f32 = vector.iter().map(|x| x * x).sum::().sqrt(); 68 | if norm == 0.0 { 69 | return vector.to_vec(); 70 | } 71 | vector.iter().map(|x| x / norm).collect() 72 | } 73 | 74 | /// A partitioner that computes the partition key based on the vertex's vector 75 | /// using LSH with cached projection vectors (stored natively as Vec>) 76 | /// and then converts the resulting binary hash into Gray code. 77 | #[derive(Debug, Clone)] 78 | pub struct FromVectorPartitioner { 79 | field_id: u64, 80 | } 81 | 82 | impl FromVectorPartitioner { 83 | /// Creates a new partitioner for the given field, dimension, and number of bits. 84 | pub fn new(field_id: u64) -> Self { 85 | Self { field_id } 86 | } 87 | } 88 | 89 | impl Partitioner for FromVectorPartitioner { 90 | /// Computes the partition key from the vertex's vector. 91 | /// It uses the globally cached projection vectors (extending them if needed) 92 | /// to compute a binary hash (by taking dot products) and then converts that hash into Gray code. 93 | fn partition_key(&self, v1: Option<&Vertex>, _v2: Option<&Vertex>) -> u64 { 94 | if let Some(vertex) = v1 { 95 | self.vertex_partition_key(&vertex.cell) 96 | } else { 97 | // Fallback: if no vertex is provided, return a constant. 98 | 0 99 | } 100 | } 101 | 102 | fn partition_local_graph(&self, graph: &mut crate::graph::local::LocalGraph) { 103 | if graph.vertices().is_empty() { 104 | return; 105 | } 106 | let mut vertex_updates = AHashMap::new(); 107 | for (vertex_id, _vertex) in graph.vertices() { 108 | if vertex_id.higher == 0 { 109 | vertex_updates.insert(*vertex_id, self.vertex_partition_key(&_vertex.cell)); 110 | } 111 | } 112 | let mut edge_updates = AHashMap::new(); 113 | for (edge_id, edge) in graph.edges() { 114 | if edge_id.higher == 0 { 115 | edge_updates.insert(*edge_id, edge.vertex_a.higher); 116 | } 117 | } 118 | graph.update_ids(&vertex_updates, &edge_updates); 119 | } 120 | } 121 | 122 | impl FromVectorPartitioner { 123 | pub fn vertex_partition_key(&self, cell: &OwnedCell) -> u64 { 124 | let field = &cell.data[self.field_id]; 125 | match field { 126 | OwnedValue::PrimArray(prim_array) => array_partition_key(prim_array), 127 | _ => { 128 | return 0; 129 | } 130 | } 131 | } 132 | } 133 | 134 | pub fn array_partition_key(array: &OwnedPrimArray) -> u64 { 135 | let vector = match array { 136 | OwnedPrimArray::F32(vector) => vector.clone(), 137 | OwnedPrimArray::F64(vector) => vector.iter().map(|f| *f as f32).collect(), 138 | OwnedPrimArray::U8(vector) => vector.iter().map(|f| *f as f32).collect(), 139 | OwnedPrimArray::U16(vector) => vector.iter().map(|f| *f as f32).collect(), 140 | OwnedPrimArray::U32(vector) => vector.iter().map(|f| *f as f32).collect(), 141 | OwnedPrimArray::U64(vector) => vector.iter().map(|f| *f as f32).collect(), 142 | OwnedPrimArray::I8(vector) => vector.iter().map(|f| *f as f32).collect(), 143 | OwnedPrimArray::I16(vector) => vector.iter().map(|f| *f as f32).collect(), 144 | OwnedPrimArray::I32(vector) => vector.iter().map(|f| *f as f32).collect(), 145 | OwnedPrimArray::I64(vector) => vector.iter().map(|f| *f as f32).collect(), 146 | _ => { 147 | return 0; 148 | } 149 | }; 150 | let normalized_vector = normalize_vector(&vector); 151 | let projections = get_projection_vectors(normalized_vector.len(), NUM_BITS); 152 | let mut hash: u64 = 0; 153 | for (i, proj) in projections.iter().enumerate() { 154 | let dot: f32 = normalized_vector 155 | .iter() 156 | .zip(proj.iter()) 157 | .map(|(a, b)| a * b) 158 | .sum(); 159 | if dot > 0.0 { 160 | hash |= 1 << i; 161 | } 162 | } 163 | binary_to_gray(hash) 164 | } 165 | -------------------------------------------------------------------------------- /src/graph/partitioner/vector_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use bifrost_hasher::hash_str; 3 | use dovahkiin::data_map; 4 | use dovahkiin::types::Map; 5 | use dovahkiin::types::OwnedPrimArray; 6 | use neb::ram::cell::OwnedCell; 7 | use neb::ram::types::OwnedValue; 8 | 9 | const VECTOR_FIELD: &str = "v"; 10 | lazy_static! { 11 | static ref VECTOR_FIELD_ID: u64 = hash_str(VECTOR_FIELD); 12 | } 13 | 14 | fn create_test_vector(values: &[f32]) -> OwnedCell { 15 | let mut cell = OwnedCell::default(); 16 | cell.data = OwnedValue::Map(data_map! { 17 | v: OwnedValue::PrimArray(OwnedPrimArray::F32(values.to_vec())) 18 | }); 19 | cell 20 | } 21 | 22 | fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { 23 | let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); 24 | let norm_a: f32 = a.iter().map(|x| x * x).sum::().sqrt(); 25 | let norm_b: f32 = b.iter().map(|x| x * x).sum::().sqrt(); 26 | dot_product / (norm_a * norm_b) 27 | } 28 | 29 | fn partition_number_distance(a: u64, b: u64) -> u64 { 30 | if a > b { 31 | a - b 32 | } else { 33 | b - a 34 | } 35 | } 36 | 37 | #[test] 38 | fn test_similar_vectors_get_similar_partitions() { 39 | let partitioner = FromVectorPartitioner::new(*VECTOR_FIELD_ID); 40 | 41 | let vs1 = [1.0, 2.0, 3.0, 4.0]; 42 | let vs2 = [1.01, 2.02, 3.03, 4.04]; 43 | 44 | // Create two very similar vectors 45 | let v1 = create_test_vector(&vs1); 46 | let v2 = create_test_vector(&vs2); 47 | 48 | let vertex1 = Vertex { cell: v1 }; 49 | let vertex2 = Vertex { cell: v2 }; 50 | 51 | let p1 = partitioner.partition_key(Some(&vertex1), None); 52 | let p2 = partitioner.partition_key(Some(&vertex2), None); 53 | 54 | // Similar vectors should get numerically close partition numbers 55 | let distance = partition_number_distance(p1, p2); 56 | assert!( 57 | distance < 10, 58 | "Similar vectors got too distant partition numbers: {} vs {}", 59 | p1, 60 | p2 61 | ); 62 | } 63 | 64 | #[test] 65 | fn test_vector_similarity_vs_partition_distance() { 66 | let partitioner = FromVectorPartitioner::new(*VECTOR_FIELD_ID); 67 | 68 | // Create a base vector 69 | let base_vector = create_test_vector(&[1.0, 0.0, 0.0, 0.0]); 70 | let base_vertex = Vertex { cell: base_vector }; 71 | let base_partition = partitioner.partition_key(Some(&base_vertex), None); 72 | 73 | // Test vectors with varying similarity to base 74 | let test_vectors = vec![ 75 | create_test_vector(&[0.99, 0.01, 0.0, 0.0]), // Very similar 76 | create_test_vector(&[0.9, 0.1, 0.0, 0.0]), // Somewhat similar 77 | create_test_vector(&[0.7, 0.3, 0.0, 0.0]), // Less similar 78 | create_test_vector(&[-0.7, -1.0, 0.9, 1.0]), // Orthogonal 79 | ]; 80 | let mut last_partition = 0; 81 | for test_vector in test_vectors { 82 | let test_vertex = Vertex { cell: test_vector }; 83 | let test_partition = partitioner.partition_key(Some(&test_vertex), None); 84 | let partition_dist = partition_number_distance(base_partition, test_partition); 85 | println!("Partition distance: {}", partition_dist); 86 | assert!(last_partition <= partition_dist); 87 | last_partition = partition_dist; 88 | } 89 | } 90 | 91 | #[test] 92 | fn test_boundary_conditions() { 93 | let partitioner = FromVectorPartitioner::new(*VECTOR_FIELD_ID); 94 | 95 | // Test zero vector 96 | let zero_vector = create_test_vector(&[0.0, 0.0, 0.0, 0.0]); 97 | let zero_vertex = Vertex { cell: zero_vector }; 98 | let zero_partition = partitioner.partition_key(Some(&zero_vertex), None); 99 | 100 | // Ensure zero vector gets a valid partition 101 | assert!( 102 | zero_partition <= u64::MAX, 103 | "Zero vector should get a valid partition" 104 | ); 105 | 106 | // Test unit vectors along different dimensions 107 | let unit_vectors = vec![ 108 | create_test_vector(&[1.0, 0.0, 0.0, 0.0]), 109 | create_test_vector(&[0.0, 1.0, 0.0, 0.0]), 110 | create_test_vector(&[0.0, 0.0, 1.0, 0.0]), 111 | create_test_vector(&[0.0, 0.0, 0.0, 1.0]), 112 | ]; 113 | 114 | let mut unit_partitions = Vec::new(); 115 | for unit_vector in unit_vectors { 116 | let unit_vertex = Vertex { cell: unit_vector }; 117 | let unit_partition = partitioner.partition_key(Some(&unit_vertex), None); 118 | unit_partitions.push(unit_partition); 119 | } 120 | 121 | // Check that unit vectors get different partitions 122 | for i in 0..unit_partitions.len() { 123 | for j in i + 1..unit_partitions.len() { 124 | assert_ne!( 125 | unit_partitions[i], unit_partitions[j], 126 | "Different unit vectors should get different partitions" 127 | ); 128 | } 129 | } 130 | 131 | // Test extreme values 132 | let extreme_vector = create_test_vector(&[f32::MAX, f32::MIN, f32::EPSILON, -0.0]); 133 | let extreme_vertex = Vertex { 134 | cell: extreme_vector, 135 | }; 136 | let extreme_partition = partitioner.partition_key(Some(&extreme_vertex), None); 137 | assert!( 138 | extreme_partition <= u64::MAX, 139 | "Extreme values should get valid partitions" 140 | ); 141 | 142 | // Test NaN handling (should be normalized properly) 143 | let nan_vector = create_test_vector(&[f32::NAN, 0.0, 0.0, 0.0]); 144 | let nan_vertex = Vertex { cell: nan_vector }; 145 | let nan_partition = partitioner.partition_key(Some(&nan_vertex), None); 146 | assert!( 147 | nan_partition <= u64::MAX, 148 | "NaN values should be handled gracefully" 149 | ); 150 | } 151 | 152 | #[test] 153 | fn test_deterministic_behavior() { 154 | let partitioner1 = FromVectorPartitioner::new(*VECTOR_FIELD_ID); 155 | let partitioner2 = FromVectorPartitioner::new(*VECTOR_FIELD_ID); 156 | 157 | let test_vector = create_test_vector(&[1.0, 2.0, 3.0, 4.0]); 158 | let test_vertex = Vertex { cell: test_vector }; 159 | 160 | let p1 = partitioner1.partition_key(Some(&test_vertex), None); 161 | let p2 = partitioner2.partition_key(Some(&test_vertex), None); 162 | 163 | assert_eq!( 164 | p1, p2, 165 | "Same vector should get same partition number across partitioner instances" 166 | ); 167 | } 168 | 169 | #[test] 170 | fn test_gray_code_properties() { 171 | let partitioner = FromVectorPartitioner::new(*VECTOR_FIELD_ID); 172 | 173 | // Test that consecutive numbers differ by only one bit 174 | let test_vector = create_test_vector(&[1.0, 0.0, 0.0, 0.0]); 175 | let test_vertex = Vertex { cell: test_vector }; 176 | 177 | let partition = partitioner.partition_key(Some(&test_vertex), None); 178 | let next_partition = partition + 1; 179 | 180 | let diff = partition ^ next_partition; 181 | assert!( 182 | diff.count_ones() <= 2, 183 | "Adjacent Gray codes should differ by at most one bit" 184 | ); 185 | } 186 | -------------------------------------------------------------------------------- /src/graph/vertex/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::graph::edge; 2 | use crate::graph::id_list::{IdList, IdListError}; 3 | use crate::server::schema::{GraphSchema, SchemaContainer}; 4 | use dovahkiin::types::{OwnedMap, OwnedValue}; 5 | use neb::client::transaction::{Transaction, TxnError}; 6 | use neb::ram::cell::OwnedCell; 7 | use neb::ram::types::Id; 8 | 9 | use super::EdgeDirection; 10 | use std::ops::{Index, IndexMut}; 11 | use std::sync::Arc; 12 | 13 | #[derive(Debug, Clone, Default)] 14 | pub struct Vertex { 15 | pub cell: OwnedCell, 16 | } 17 | 18 | #[derive(Debug)] 19 | pub enum RemoveError { 20 | NotFound, 21 | FormatError, 22 | IdListError(IdListError), 23 | EdgeError(edge::EdgeError), 24 | } 25 | 26 | pub fn cell_to_vertex<'a>(cell: OwnedCell) -> Vertex { 27 | Vertex { cell } 28 | } 29 | 30 | pub fn vertex_to_cell<'a>(vertex: Vertex) -> OwnedCell { 31 | vertex.cell 32 | } 33 | 34 | impl Vertex { 35 | pub fn new(schema: u32, data: OwnedMap) -> Vertex { 36 | Self::new_with_id(Id::unit_id(), schema, data) 37 | } 38 | pub fn new_with_id(id: Id, schema: u32, data: OwnedMap) -> Vertex { 39 | Vertex { 40 | cell: OwnedCell::new_with_id(schema, &id, OwnedValue::Map(data)), 41 | } 42 | } 43 | pub fn schema(&self) -> u32 { 44 | self.cell.header.schema 45 | } 46 | } 47 | 48 | pub async fn txn_remove( 49 | txn: &Transaction, 50 | schemas: &Arc, 51 | vertex: V, 52 | ) -> Result, TxnError> 53 | where 54 | V: ToVertexId, 55 | { 56 | let id = &vertex.to_id(); 57 | match txn.head(*id).await? { 58 | Some(_head) => { 59 | let remove_field_lists = |id: Id, txn: Transaction, field_id: u64| { 60 | async move { 61 | let (type_list_id, schemas_ids) = 62 | match IdList::cell_types(&txn, id, field_id).await? { 63 | Some(t) => t, 64 | None => { 65 | error!("Failed to get type list, {:?}", id); 66 | return Ok(Err(RemoveError::FormatError)); 67 | } 68 | }; 69 | for schema_id in schemas_ids { 70 | let mut id_list = 71 | IdList::from_txn_and_container(&txn, id, field_id, schema_id); 72 | { 73 | // remove edge cells 74 | let mut iter = match id_list.iter().await? { 75 | Ok(iter) => iter, 76 | Err(e) => return Ok(Err(RemoveError::IdListError(e))), 77 | }; 78 | let edge_attrs = match schemas.schema_type(schema_id) { 79 | Some(GraphSchema::Edge(ea)) => ea, 80 | _ => return Ok(Err(RemoveError::FormatError)), 81 | }; 82 | let edge_schema_id = schema_id; 83 | while let Some(edge_id) = iter.next().await { 84 | let edge = match edge::from_id( 85 | id, 86 | &edge_attrs, 87 | edge_schema_id, 88 | iter.segments.id_iter.txn, 89 | edge_id, 90 | ) 91 | .await? 92 | { 93 | Ok(edge) => edge, 94 | Err(e) => return Ok(Err(RemoveError::EdgeError(e))), 95 | }; 96 | match edge.remove(iter.segments.id_iter.txn).await? { 97 | Ok(()) => {} 98 | Err(e) => return Ok(Err(RemoveError::EdgeError(e))), 99 | } 100 | } 101 | } 102 | match id_list.clear_segments().await? { 103 | // remove segment cells 104 | Ok(()) => {} 105 | Err(e) => return Ok(Err(RemoveError::IdListError(e))), 106 | } 107 | } 108 | txn.remove(type_list_id).await?; // remove field schema list cell 109 | Ok(Ok(())) 110 | } 111 | }; 112 | match remove_field_lists(*id, txn.clone(), EdgeDirection::Undirected.as_field()).await? 113 | { 114 | Ok(()) => {} 115 | Err(e) => return Ok(Err(e)), 116 | } 117 | match remove_field_lists(*id, txn.clone(), EdgeDirection::Inbound.as_field()).await? { 118 | Ok(()) => {} 119 | Err(e) => return Ok(Err(e)), 120 | } 121 | match remove_field_lists(*id, txn.clone(), EdgeDirection::Outbound.as_field()).await? { 122 | Ok(()) => {} 123 | Err(e) => return Ok(Err(e)), 124 | } 125 | txn.remove(*id).await.map(|_| Ok(())) // remove vertex cell 126 | } 127 | None => Ok(Err(RemoveError::NotFound)), 128 | } 129 | } 130 | 131 | pub async fn txn_update(txn: &Transaction, vertex: V, update: U) -> Result<(), TxnError> 132 | where 133 | V: ToVertexId, 134 | U: Fn(Vertex) -> Option, 135 | { 136 | let id = &vertex.to_id(); 137 | let update_cell = |cell| match update(cell_to_vertex(cell)) { 138 | Some(vertex) => Some(vertex_to_cell(vertex)), 139 | None => None, 140 | }; 141 | let cell = txn.read(*id).await?; 142 | match cell { 143 | Some(cell) => match update_cell(cell) { 144 | Some(cell) => txn.update(cell).await, 145 | None => txn.abort().await, 146 | }, 147 | None => txn.abort().await, 148 | } 149 | } 150 | 151 | impl Vertex { 152 | pub fn id(&self) -> Id { 153 | self.cell.id() 154 | } 155 | } 156 | 157 | pub trait ToVertexId { 158 | fn to_id(&self) -> Id; 159 | } 160 | 161 | impl ToVertexId for Vertex { 162 | fn to_id(&self) -> Id { 163 | self.cell.id() 164 | } 165 | } 166 | 167 | impl ToVertexId for Id { 168 | fn to_id(&self) -> Id { 169 | *self 170 | } 171 | } 172 | 173 | impl<'a> ToVertexId for &'a Id { 174 | fn to_id(&self) -> Id { 175 | **self 176 | } 177 | } 178 | 179 | impl<'a> ToVertexId for &'a Vertex { 180 | fn to_id(&self) -> Id { 181 | self.cell.id() 182 | } 183 | } 184 | 185 | impl<'a> Index for Vertex { 186 | type Output = OwnedValue; 187 | fn index(&self, index: u64) -> &Self::Output { 188 | &self.cell.data[index] 189 | } 190 | } 191 | 192 | impl<'a> Index<&'a str> for Vertex { 193 | type Output = OwnedValue; 194 | fn index(&self, index: &'a str) -> &Self::Output { 195 | &self.cell.data[index] 196 | } 197 | } 198 | 199 | impl<'a> IndexMut<&'a str> for Vertex { 200 | fn index_mut(&mut self, index: &'a str) -> &mut Self::Output { 201 | &mut self.cell[index] 202 | } 203 | } 204 | 205 | impl<'a> IndexMut for Vertex { 206 | fn index_mut(&mut self, index: u64) -> &mut Self::Output { 207 | &mut self.cell[index] 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /src/job/logger.rs: -------------------------------------------------------------------------------- 1 | use parking_lot::Mutex; 2 | use std::sync::Arc; 3 | use std::time::{SystemTime, UNIX_EPOCH}; 4 | 5 | use lightning::map::{Map, PtrHashMap}; 6 | use serde::{Deserialize, Serialize}; 7 | 8 | use crate::job::JobId; 9 | 10 | pub type JobLogger = PtrHashMap>>>; 11 | 12 | #[derive(Debug, Clone, Serialize, Deserialize)] 13 | pub struct JobLog { 14 | pub timestamp: u64, 15 | pub level: JobLogLevel, 16 | pub message: String, 17 | } 18 | 19 | #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] 20 | pub enum JobLogLevel { 21 | Error, 22 | Warning, 23 | Info, 24 | Trace, 25 | } 26 | 27 | pub fn append_job_log( 28 | job_logger: &Arc, 29 | job_id: JobId, 30 | level: JobLogLevel, 31 | message: String, 32 | ) { 33 | if cfg!(debug_assertions) && level != JobLogLevel::Trace { 34 | println!("[{:?} - {:?}] {}", job_id, level, message); 35 | return; 36 | } 37 | if level == JobLogLevel::Trace { 38 | return; 39 | } 40 | match job_logger.get(&job_id) { 41 | Some(job_logger) => { 42 | job_logger.lock().push(JobLog { 43 | timestamp: SystemTime::now() 44 | .duration_since(UNIX_EPOCH) 45 | .unwrap() 46 | .as_secs(), 47 | level, 48 | message, 49 | }); 50 | } 51 | None => { 52 | // error!( 53 | // "Failed to get job logger by job id: {:?}, inserting new job logger", 54 | // job_id 55 | // ); 56 | job_logger.try_insert(job_id, Arc::new(Mutex::new(Vec::new()))); 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/job/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::traversal::navigation::App; 2 | use dovahkiin::types::Id; 3 | use parking_lot::Mutex; 4 | use serde::{Deserialize, Serialize}; 5 | use std::sync::Arc; 6 | 7 | pub mod logger; 8 | pub mod service; 9 | 10 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] 11 | pub struct JobId { 12 | pub coordinator_server_id: u64, 13 | pub coordinator_job_id: u64, 14 | } 15 | 16 | #[derive(Debug, Clone, Serialize, Deserialize)] 17 | pub struct JobReport { 18 | pub status: JobStatus, 19 | pub result: Vec, 20 | } 21 | 22 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] 23 | pub enum JobStatus { 24 | Created, 25 | Initialized, 26 | Running, 27 | Finished, 28 | Failed, 29 | } 30 | 31 | pub struct Job { 32 | pub job_id: JobId, 33 | pub app: App, 34 | pub config: Vec, 35 | pub report: Arc>, 36 | } 37 | 38 | pub enum CollectiveResult { 39 | Found(Vec<(Id, u64)>), 40 | Continue(Vec<(Id, u64, f32)>), 41 | Error(Vec<(Id, u64, String)>), 42 | NotFound, 43 | } 44 | 45 | impl JobReport { 46 | pub fn new() -> Self { 47 | Self { 48 | status: JobStatus::Created, 49 | result: vec![], 50 | } 51 | } 52 | 53 | pub fn update_status(&mut self, status: JobStatus) { 54 | self.status = status; 55 | } 56 | 57 | pub fn update_findings(&mut self, findings: Vec) { 58 | self.result = findings; 59 | } 60 | } 61 | 62 | pub fn initialize_job_run(report: &Arc>) { 63 | report.lock().update_status(JobStatus::Running); 64 | } 65 | 66 | impl JobId { 67 | pub fn new(coordinator_server_id: u64, coordinator_job_id: u64) -> Self { 68 | Self { 69 | coordinator_server_id, 70 | coordinator_job_id, 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/job/service.rs: -------------------------------------------------------------------------------- 1 | use bifrost::{dispatch_rpc_service_functions, service}; 2 | use futures::future::BoxFuture; 3 | use lightning::map::Map; 4 | use parking_lot::Mutex; 5 | 6 | use super::{ 7 | logger::{JobLog, JobLogger}, 8 | JobId, 9 | }; 10 | 11 | service! { 12 | rpc init_node_logger(job_id: JobId) -> Result<(), String>; 13 | rpc get_node_logs(job_id: JobId) -> Option>; 14 | rpc retire_node_logger(job_id: JobId) -> Result<(), String>; 15 | } 16 | 17 | pub struct JobService { 18 | pub job_logger: Arc, 19 | } 20 | 21 | impl Service for JobService { 22 | fn init_node_logger<'a>(&'a self, job_id: JobId) -> BoxFuture<'a, Result<(), String>> { 23 | self.job_logger 24 | .insert_no_rt(job_id, Arc::new(Mutex::new(Vec::new()))); 25 | future::ready(Ok(())).boxed() 26 | } 27 | 28 | fn get_node_logs<'a>( 29 | &'a self, 30 | job_id: JobId, 31 | ) -> ::futures::future::BoxFuture<'a, Option>> { 32 | future::ready( 33 | self.job_logger 34 | .get(&job_id) 35 | .map(|logger| logger.lock().clone()), 36 | ) 37 | .boxed() 38 | } 39 | 40 | fn retire_node_logger<'a>( 41 | &'a self, 42 | job_id: JobId, 43 | ) -> ::futures::future::BoxFuture<'a, Result<(), String>> { 44 | self.job_logger.remove(&job_id); 45 | future::ready(Ok(())).boxed() 46 | } 47 | } 48 | 49 | impl JobService { 50 | pub fn logger(&self) -> &Arc { 51 | &self.job_logger 52 | } 53 | } 54 | 55 | dispatch_rpc_service_functions!(JobService); 56 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(proc_macro)] 2 | #![feature(conservative_impl_trait)] 3 | 4 | extern crate neb; 5 | #[macro_use] 6 | extern crate lazy_static; 7 | extern crate bifrost; 8 | extern crate bifrost_hasher; 9 | #[macro_use] 10 | extern crate bifrost_plugins; 11 | extern crate parking_lot; 12 | extern crate serde; 13 | #[macro_use] 14 | extern crate serde_derive; 15 | #[macro_use] 16 | extern crate log; 17 | extern crate env_logger; 18 | extern crate log4rs; 19 | extern crate serde_yaml; 20 | extern crate yaml_rust; 21 | 22 | mod apps; 23 | mod config; 24 | mod graph; 25 | mod job; 26 | mod query; 27 | mod server; 28 | #[cfg(test)] 29 | mod tests; 30 | mod traversal; 31 | mod utils; 32 | use std::thread; 33 | 34 | #[tokio::main(flavor = "multi_thread")] 35 | async fn main() { 36 | log4rs::init_file("config/log4rs.yaml", Default::default()).unwrap(); 37 | info!("Shisoft Morpheus is initializing..."); 38 | query::init().unwrap(); 39 | let config = config::options_from_file("config/server.yaml"); 40 | server::MorpheusServer::new(config).await.unwrap(); 41 | } 42 | -------------------------------------------------------------------------------- /src/query/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::graph::edge::Edge; 2 | use crate::graph::vertex::Vertex; 3 | use dovahkiin::expr; 4 | use dovahkiin::types::OwnedValue; 5 | use neb::dovahkiin::expr::interpreter::Interpreter; 6 | use neb::dovahkiin::expr::symbols::bindings::bind; 7 | use neb::dovahkiin::expr::symbols::utils::is_true; 8 | use neb::dovahkiin::expr::SExpr; 9 | use neb::dovahkiin::integrated::lisp::parse_to_sexpr; 10 | use neb::dovahkiin::types::Value; 11 | 12 | pub static VERTEX_SYMBOL: u64 = hash_ident!(vertex) as u64; 13 | pub static EDGE_SYMBOL: u64 = hash_ident!(edge) as u64; 14 | 15 | #[derive(Debug)] 16 | pub enum InitQueryError { 17 | CannotInitSymbols, 18 | } 19 | 20 | pub mod symbols; 21 | 22 | pub fn init() -> Result<(), InitQueryError> { 23 | symbols::init_symbols().map_err(|_| InitQueryError::CannotInitSymbols)?; 24 | Ok(()) 25 | } 26 | 27 | pub trait Expr { 28 | fn to_sexpr(&self) -> Result, String>; 29 | } 30 | 31 | impl Expr for String { 32 | fn to_sexpr(&self) -> Result, String> { 33 | parse_to_sexpr(&self) 34 | } 35 | } 36 | 37 | impl<'a> Expr for &'a str { 38 | fn to_sexpr(&self) -> Result, String> { 39 | parse_to_sexpr(self) 40 | } 41 | } 42 | 43 | impl<'a> Expr for &'a Vec> { 44 | fn to_sexpr(&self) -> Result, String> { 45 | return Ok(self.to_vec()); 46 | } 47 | } 48 | 49 | pub struct Tester<'a> { 50 | core: Interpreter<'a>, 51 | } 52 | 53 | fn prep_interp<'a>() -> Interpreter<'a> { 54 | Interpreter::new() 55 | } 56 | 57 | pub fn parse_optional_expr(expr: &Option) -> Result>, String> 58 | where 59 | E: Expr, 60 | { 61 | match expr { 62 | &Some(ref expr) => { 63 | let expr_owned = expr.clone(); 64 | Ok(Some(expr_owned.to_sexpr()?)) 65 | } 66 | &None => Ok(None), 67 | } 68 | } 69 | 70 | impl<'a> Tester<'a> { 71 | pub async fn eval_with_edge_and_vertex( 72 | sexpr: &Option>>, 73 | vertex: &Vertex, 74 | edge: &Edge, 75 | ) -> Result { 76 | let sexpr = sexpr.clone(); // TODO: Memory management 77 | let sexpr = if let Some(expr) = sexpr { 78 | expr 79 | } else { 80 | return Ok(true); 81 | }; 82 | let mut interp = prep_interp(); 83 | bind( 84 | interp.get_env(), 85 | VERTEX_SYMBOL, 86 | SExpr::Value(expr::Value::Owned(vertex.cell.data.clone())), 87 | ); 88 | bind( 89 | interp.get_env(), 90 | EDGE_SYMBOL, 91 | SExpr::Value(if let &Some(ref e) = edge.get_data().await { 92 | expr::Value::Owned(e.data.clone()) 93 | } else { 94 | expr::Value::Owned(OwnedValue::Null) 95 | }), 96 | ); 97 | Ok(is_true(&interp.eval(sexpr)?)) 98 | } 99 | 100 | pub fn eval_with_vertex(sexpr: &Option>, vertex: &Vertex) -> Result { 101 | let sexpr = sexpr.clone(); // TODO: Memory management 102 | let sexpr = if let Some(expr) = sexpr { 103 | expr 104 | } else { 105 | return Ok(true); 106 | }; 107 | let mut interp = prep_interp(); 108 | bind( 109 | interp.get_env(), 110 | VERTEX_SYMBOL, 111 | SExpr::Value(expr::Value::Owned(vertex.cell.data.clone())), 112 | ); 113 | Ok(is_true(&interp.eval(sexpr)?)) 114 | } 115 | 116 | pub async fn eval_with_edge( 117 | sexpr: &Option>>, 118 | edge: &Edge, 119 | ) -> Result { 120 | let sexpr = sexpr.clone(); // TODO: Memory management 121 | let sexpr = if let Some(expr) = sexpr { 122 | expr 123 | } else { 124 | return Ok(true); 125 | }; 126 | let mut interp = prep_interp(); 127 | bind( 128 | interp.get_env(), 129 | EDGE_SYMBOL, 130 | SExpr::Value(if let &Some(ref e) = edge.get_data().await { 131 | expr::Value::Owned(e.data.clone()) 132 | } else { 133 | expr::Value::Owned(OwnedValue::Null) 134 | }), 135 | ); 136 | Ok(is_true(&interp.eval(sexpr)?)) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/query/symbols/crud/cell.rs: -------------------------------------------------------------------------------- 1 | use dovahkiin::expr::interpreter::Environment; 2 | use neb::dovahkiin::expr::symbols::Symbol; 3 | use neb::dovahkiin::expr::SExpr; 4 | 5 | // (insert-cell "" (hashmap "" "" (hashmap "" ))) 6 | #[derive(Debug)] 7 | pub struct Insert {} 8 | impl Symbol for Insert { 9 | fn eval<'a>( 10 | &self, 11 | exprs: Vec>, 12 | env: &mut Environment<'a>, 13 | ) -> Result, String> { 14 | unimplemented!(); 15 | } 16 | fn is_macro(&self) -> bool { 17 | true 18 | } 19 | } 20 | 21 | // (select-cell "" ) 22 | // (select-cell "" (hashmap "" )) // until index is done 23 | #[derive(Debug)] 24 | pub struct Select {} 25 | impl Symbol for Select { 26 | fn eval<'a>( 27 | &self, 28 | exprs: Vec>, 29 | env: &mut Environment<'a>, 30 | ) -> Result, String> { 31 | unimplemented!(); 32 | } 33 | fn is_macro(&self) -> bool { 34 | true 35 | } 36 | } 37 | 38 | // (update-cell "" (hashmap ...)) 39 | // (update-cell "" (hashmap ...) (hashmap ...)) // until index is done 40 | #[derive(Debug)] 41 | pub struct Update {} 42 | impl Symbol for Update { 43 | fn eval<'a>( 44 | &self, 45 | exprs: Vec>, 46 | env: &mut Environment<'a>, 47 | ) -> Result, String> { 48 | unimplemented!(); 49 | } 50 | fn is_macro(&self) -> bool { 51 | true 52 | } 53 | } 54 | 55 | // (delete-cell "" ) 56 | // (delete-cell "" (hashmap ...)) // until index is done 57 | #[derive(Debug)] 58 | pub struct Delete {} 59 | impl Symbol for Delete { 60 | fn eval<'a>( 61 | &self, 62 | exprs: Vec>, 63 | env: &mut Environment<'a>, 64 | ) -> Result, String> { 65 | unimplemented!(); 66 | } 67 | fn is_macro(&self) -> bool { 68 | true 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/query/symbols/crud/edge.rs: -------------------------------------------------------------------------------- 1 | use dovahkiin::expr::interpreter::Environment; 2 | use neb::dovahkiin::expr::symbols::Symbol; 3 | use neb::dovahkiin::expr::SExpr; 4 | 5 | #[derive(Debug)] 6 | pub struct Insert {} 7 | impl Symbol for Insert { 8 | fn eval<'a>( 9 | &self, 10 | exprs: Vec>, 11 | env: &mut Environment<'a>, 12 | ) -> Result, String> { 13 | unimplemented!(); 14 | } 15 | fn is_macro(&self) -> bool { 16 | true 17 | } 18 | } 19 | 20 | #[derive(Debug)] 21 | pub struct Select {} 22 | impl Symbol for Select { 23 | fn eval<'a>( 24 | &self, 25 | exprs: Vec>, 26 | env: &mut Environment<'a>, 27 | ) -> Result, String> { 28 | unimplemented!(); 29 | } 30 | fn is_macro(&self) -> bool { 31 | true 32 | } 33 | } 34 | 35 | #[derive(Debug)] 36 | pub struct Update {} 37 | impl Symbol for Update { 38 | fn eval<'a>( 39 | &self, 40 | exprs: Vec>, 41 | env: &mut Environment<'a>, 42 | ) -> Result, String> { 43 | unimplemented!(); 44 | } 45 | fn is_macro(&self) -> bool { 46 | true 47 | } 48 | } 49 | 50 | #[derive(Debug)] 51 | pub struct Delete {} 52 | impl Symbol for Delete { 53 | fn eval<'a>( 54 | &self, 55 | exprs: Vec>, 56 | env: &mut Environment<'a>, 57 | ) -> Result, String> { 58 | unimplemented!(); 59 | } 60 | fn is_macro(&self) -> bool { 61 | true 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/query/symbols/crud/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod cell; 2 | pub mod edge; 3 | pub mod vertex; 4 | -------------------------------------------------------------------------------- /src/query/symbols/crud/vertex.rs: -------------------------------------------------------------------------------- 1 | use dovahkiin::expr::interpreter::Environment; 2 | use neb::dovahkiin::expr::symbols::Symbol; 3 | use neb::dovahkiin::expr::SExpr; 4 | 5 | #[derive(Debug)] 6 | pub struct Insert {} 7 | impl Symbol for Insert { 8 | fn eval<'a>( 9 | &self, 10 | exprs: Vec>, 11 | env: &mut Environment<'a>, 12 | ) -> Result, String> { 13 | unimplemented!(); 14 | } 15 | fn is_macro(&self) -> bool { 16 | true 17 | } 18 | } 19 | 20 | #[derive(Debug)] 21 | pub struct Select {} 22 | impl Symbol for Select { 23 | fn eval<'a>( 24 | &self, 25 | exprs: Vec>, 26 | env: &mut Environment<'a>, 27 | ) -> Result, String> { 28 | unimplemented!(); 29 | } 30 | fn is_macro(&self) -> bool { 31 | true 32 | } 33 | } 34 | 35 | #[derive(Debug)] 36 | pub struct Update {} 37 | impl Symbol for Update { 38 | fn eval<'a>( 39 | &self, 40 | exprs: Vec>, 41 | env: &mut Environment<'a>, 42 | ) -> Result, String> { 43 | unimplemented!(); 44 | } 45 | fn is_macro(&self) -> bool { 46 | true 47 | } 48 | } 49 | 50 | #[derive(Debug)] 51 | pub struct Delete {} 52 | impl Symbol for Delete { 53 | fn eval<'a>( 54 | &self, 55 | exprs: Vec>, 56 | env: &mut Environment<'a>, 57 | ) -> Result, String> { 58 | unimplemented!(); 59 | } 60 | fn is_macro(&self) -> bool { 61 | true 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/query/symbols/mod.rs: -------------------------------------------------------------------------------- 1 | use neb::dovahkiin::expr::symbols::ISYMBOL_MAP; 2 | 3 | pub mod crud; 4 | 5 | pub fn init_symbols() -> Result<(), ()> { 6 | ISYMBOL_MAP.insert("insert-cell", crud::cell::Insert {})?; 7 | ISYMBOL_MAP.insert("insert-vertex", crud::vertex::Insert {})?; 8 | 9 | ISYMBOL_MAP.insert("select-cell", crud::cell::Select {})?; 10 | ISYMBOL_MAP.insert("select-vertex", crud::vertex::Select {})?; 11 | ISYMBOL_MAP.insert("select-edge", crud::edge::Select {})?; 12 | 13 | ISYMBOL_MAP.insert("update-cell", crud::cell::Update {})?; 14 | ISYMBOL_MAP.insert("update-vertex", crud::vertex::Update {})?; 15 | ISYMBOL_MAP.insert("update-edge", crud::edge::Update {})?; 16 | 17 | ISYMBOL_MAP.insert("delete-cell", crud::cell::Delete {})?; 18 | ISYMBOL_MAP.insert("delete-vertex", crud::vertex::Delete {})?; 19 | ISYMBOL_MAP.insert("delete-edge", crud::edge::Delete {})?; 20 | Ok(()) 21 | } 22 | -------------------------------------------------------------------------------- /src/server/general.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/server/mod.rs: -------------------------------------------------------------------------------- 1 | use bifrost::raft::state_machine::master::ExecError; 2 | use bifrost::rpc; 3 | use bifrost::tcp::STANDALONE_ADDRESS_STRING; 4 | use futures::prelude::*; 5 | use futures::{future, Future}; 6 | use neb::client::{AsyncClient as NebClient, NebClientError}; 7 | use neb::server::{NebServer, ServerError, ServerOptions as NebServerOptions}; 8 | use std::sync::Arc; 9 | 10 | use crate::apps::hnsw::coordinator::HNSWIndexService; 11 | use crate::apps::hnsw::partition::service::HNSW_PARTITION_SERVICE_ID; 12 | use crate::apps::hnsw::{HNSWPartitionService, VectorIndexer}; 13 | use crate::graph::GraphEngine; 14 | use crate::job::logger::JobLogger; 15 | 16 | pub mod general; 17 | pub mod schema; 18 | pub mod traversal; 19 | 20 | #[derive(Debug)] 21 | pub enum MorpheusServerError { 22 | ServerError(ServerError), 23 | ClientError(NebClientError), 24 | InitSchemaError(ExecError), 25 | } 26 | 27 | pub struct MorpheusServer { 28 | pub neb_server: Arc, 29 | pub neb_client: Arc, 30 | pub schema_container: Arc, 31 | pub graph: Arc, 32 | pub job_logger: Arc, 33 | } 34 | 35 | #[derive(Debug, Serialize, Deserialize)] 36 | pub struct MorphesOptions { 37 | pub server_addr: String, 38 | pub group_name: String, 39 | pub storage: NebServerOptions, 40 | pub meta_members: Vec, 41 | } 42 | 43 | impl MorpheusServer { 44 | pub async fn new(options: MorphesOptions) -> Result, MorpheusServerError> { 45 | let neb_opts = &options.storage; 46 | let group_name = &options.group_name; 47 | let neb_server = NebServer::new_from_opts(neb_opts, &options.server_addr, group_name).await; 48 | let neb_client = Arc::new( 49 | neb::client::AsyncClient::new( 50 | &neb_server.rpc, 51 | &neb_server.membership, 52 | &options.meta_members, 53 | group_name, 54 | ) 55 | .await 56 | .unwrap(), 57 | ); 58 | debug!("Initializing schemas"); 59 | schema::SchemaContainer::new_meta_service(group_name, &neb_server.raft_service).await; 60 | let schema_container = schema::SchemaContainer::new_client( 61 | group_name, 62 | &neb_client.raft_client, 63 | &neb_client, 64 | &neb_server.meta, 65 | ) 66 | .await 67 | .map_err(MorpheusServerError::InitSchemaError)?; 68 | debug!("Schema container initialized"); 69 | let graph = Arc::new( 70 | GraphEngine::new(&schema_container, &neb_client, &neb_server) 71 | .map_err(MorpheusServerError::InitSchemaError) 72 | .await?, 73 | ); 74 | let job_logger = Arc::new(JobLogger::with_capacity(64)); 75 | Ok(Arc::new(MorpheusServer { 76 | neb_server, 77 | neb_client, 78 | schema_container, 79 | graph, 80 | job_logger, 81 | })) 82 | } 83 | 84 | pub async fn init_hnsw_index_partition_service( 85 | &self, 86 | ) -> Result, String> { 87 | let service = HNSWPartitionService::new( 88 | self.neb_server.server_id, 89 | &self.neb_server.consh, 90 | &self.neb_server.raft_client, 91 | &self.neb_server.chunks, 92 | &self.graph, 93 | &self.job_logger, 94 | ) 95 | .await?; 96 | let service_ref = Arc::new(service); 97 | self.neb_server.rpc.register_service(&service_ref).await; 98 | Ok(service_ref) 99 | } 100 | 101 | pub async fn init_hnsw_index_service(&self) -> Result, String> { 102 | let service = HNSWIndexService::new( 103 | &self.graph, 104 | &self.neb_server.consh, 105 | &self.neb_server.raft_client, 106 | ) 107 | .await?; 108 | let service_ref = Arc::new(service); 109 | self.neb_server.rpc.register_service(&service_ref).await; 110 | VectorIndexer::new_and_set_core(&self.neb_server).await; 111 | Ok(service_ref) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/server/schema/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::graph::edge; 2 | use crate::graph::edge::{EdgeAttributes, EdgeType}; 3 | use crate::graph::fields::VERTEX_TEMPLATE; 4 | use crate::server::schema::sm::client::SMClient; 5 | use bifrost::raft::client::RaftClient; 6 | use bifrost::raft::state_machine::master::ExecError; 7 | use bifrost::raft::RaftService; 8 | use bifrost_hasher::hash_str; 9 | use dovahkiin::types::Type; 10 | use futures::{future, Future, FutureExt, TryFutureExt}; 11 | use lightning::map::{Map, PtrHashMap as LFHashMap}; 12 | use neb::client::AsyncClient as NebClient; 13 | use neb::ram::schema::{DelSchemaError, Field, NewSchemaError, Schema}; 14 | use neb::server::ServerMeta as NebServerMeta; 15 | use std::sync::Arc; 16 | 17 | mod sm; 18 | 19 | #[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq)] 20 | pub enum GraphSchema { 21 | Unspecified, 22 | Vertex, 23 | Edge(EdgeAttributes), 24 | } 25 | 26 | #[derive(Serialize, Deserialize, Debug, Clone)] 27 | pub enum SchemaError { 28 | NebSchemaExecError(ExecError), 29 | MorpheusSchemaExecError(ExecError), 30 | NewNebSchemaVerificationError(NewSchemaError), 31 | DelNebSchemaVerificationError(DelSchemaError), 32 | SimpleEdgeShouldNotHaveSchema, 33 | SchemaTypeUnspecified, 34 | } 35 | 36 | pub struct SchemaContainer { 37 | pub neb_client: Arc, 38 | map: Arc>, 39 | sm_client: Arc, 40 | neb_mata: Arc, 41 | } 42 | 43 | #[derive(Clone, Debug)] 44 | pub struct MorpheusSchema { 45 | pub id: u32, 46 | pub name: String, 47 | pub schema_type: GraphSchema, 48 | pub key_field: Option>, 49 | pub fields: Vec, 50 | pub is_dynamic: bool, 51 | } 52 | 53 | lazy_static! { 54 | pub static ref EMPTY_FIELDS: Vec = Vec::new(); 55 | } 56 | 57 | impl MorpheusSchema { 58 | pub fn new_with_id_and_type<'a>( 59 | id: u32, 60 | name: &'a str, 61 | key_field: Option<&Vec>, 62 | fields: &Vec, 63 | is_dynamic: bool, 64 | schema_type: GraphSchema, 65 | ) -> MorpheusSchema { 66 | MorpheusSchema { 67 | id, 68 | name: name.to_string(), 69 | key_field: key_field.cloned(), 70 | fields: fields.clone(), 71 | schema_type, 72 | is_dynamic, 73 | } 74 | } 75 | 76 | pub fn new_with_id<'a>( 77 | id: u32, 78 | name: &'a str, 79 | key_field: Option<&Vec>, 80 | fields: &Vec, 81 | is_dynamic: bool, 82 | ) -> MorpheusSchema { 83 | MorpheusSchema::new_with_id_and_type( 84 | id, 85 | name, 86 | key_field, 87 | fields, 88 | is_dynamic, 89 | GraphSchema::Unspecified, 90 | ) 91 | } 92 | 93 | pub fn new_vertex_with_id<'a>( 94 | id: u32, 95 | name: &'a str, 96 | key_field: Option<&Vec>, 97 | fields: &Vec, 98 | is_dynamic: bool, 99 | ) -> MorpheusSchema { 100 | MorpheusSchema::new_with_id_and_type( 101 | id, 102 | name, 103 | key_field, 104 | fields, 105 | is_dynamic, 106 | GraphSchema::Vertex, 107 | ) 108 | } 109 | 110 | pub fn new_edge_with_id<'a>( 111 | id: u32, 112 | name: &'a str, 113 | key_field: Option<&Vec>, 114 | fields: &Vec, 115 | edge_attr: EdgeAttributes, 116 | is_dynamic: bool, 117 | ) -> MorpheusSchema { 118 | MorpheusSchema::new_with_id_and_type( 119 | id, 120 | name, 121 | key_field, 122 | fields, 123 | is_dynamic, 124 | GraphSchema::Edge(edge_attr), 125 | ) 126 | } 127 | 128 | pub fn new<'a>( 129 | name: &'a str, 130 | key_field: Option<&Vec>, 131 | fields: &Vec, 132 | is_dynamic: bool, 133 | ) -> MorpheusSchema { 134 | MorpheusSchema::new_with_id(0, name, key_field, fields, is_dynamic) 135 | } 136 | 137 | pub fn into_ref(self) -> Arc { 138 | Arc::new(self) 139 | } 140 | } 141 | 142 | pub fn cell_fields( 143 | schema_type: GraphSchema, 144 | mut body_fields: Vec, 145 | ) -> Result, SchemaError> { 146 | let mut fields = match schema_type { 147 | GraphSchema::Vertex => VERTEX_TEMPLATE.clone(), 148 | GraphSchema::Edge(edge_attr) => { 149 | if !edge_attr.has_body && body_fields.len() > 0 { 150 | return Err(SchemaError::SimpleEdgeShouldNotHaveSchema); 151 | } 152 | match edge_attr.edge_type { 153 | EdgeType::Directed => edge::directed::EDGE_TEMPLATE.clone(), 154 | EdgeType::Undirected => edge::undirectd::EDGE_TEMPLATE.clone(), 155 | } 156 | } 157 | GraphSchema::Unspecified => return Err(SchemaError::SchemaTypeUnspecified), 158 | }; 159 | fields.append(&mut body_fields); 160 | Ok(fields) 161 | } 162 | 163 | pub fn generate_sm_id<'a>(group: &'a str) -> u64 { 164 | hash_str(&format!("{}-{}", sm::DEFAULT_RAFT_PREFIX, group)) 165 | } 166 | 167 | impl SchemaContainer { 168 | pub async fn new_meta_service<'a>(group: &'a str, raft_service: &Arc) { 169 | let container_sm = sm::GraphSchemasSM::new(generate_sm_id(group), raft_service).await; 170 | raft_service 171 | .register_state_machine(Box::new(container_sm)) 172 | .await; 173 | } 174 | 175 | pub async fn new_client<'a>( 176 | group: &'a str, 177 | raft_client: &Arc, 178 | neb_client: &Arc, 179 | neb_meta: &Arc, 180 | ) -> Result, ExecError> { 181 | let sm_client = Arc::new(SMClient::new(generate_sm_id(group), &raft_client)); 182 | let sm_entries = sm_client.get_all().await?; 183 | let container = SchemaContainer { 184 | map: Arc::new(LFHashMap::with_capacity(64)), 185 | sm_client: sm_client.clone(), 186 | neb_client: neb_client.clone(), 187 | neb_mata: neb_meta.clone(), 188 | }; 189 | let container_ref = Arc::new(container); 190 | let container_ref1 = container_ref.clone(); 191 | let container_ref2 = container_ref.clone(); 192 | for (schema_id, schema_type) in sm_entries { 193 | container_ref.map.insert(schema_id, schema_type); 194 | } 195 | let _r1 = sm_client 196 | .on_schema_added(move |res| { 197 | let (id, schema_type) = res; 198 | container_ref1.map.insert(id, schema_type); 199 | future::ready(()).boxed() 200 | }) 201 | .await? 202 | .unwrap(); 203 | let _r2 = sm_client 204 | .on_schema_deleted(move |id| { 205 | container_ref2.map.remove(&id); 206 | future::ready(()).boxed() 207 | }) 208 | .await? 209 | .unwrap(); 210 | return Ok(container_ref); 211 | } 212 | 213 | // Note that if the edge does not have a body, the schema will not be used to generate the edge cell 214 | pub async fn new_schema(&self, schema: MorpheusSchema) -> Result { 215 | let schema_type = schema.schema_type; 216 | let sm_client = &self.sm_client; 217 | let neb_client = &self.neb_client; 218 | let schema_fields = cell_fields(schema_type, schema.fields.clone())?; 219 | let neb_schema = Schema::new_with_id( 220 | schema.id, 221 | &schema.name, 222 | schema.key_field.clone(), 223 | Field::new_schema(schema_fields), 224 | schema.is_dynamic, 225 | false, 226 | ); 227 | let schema_id = if schema.id == 0 { 228 | neb_client 229 | .new_schema(neb_schema) 230 | .await 231 | .map_err(|e| SchemaError::NebSchemaExecError(e))? 232 | .map_err(|e| SchemaError::NewNebSchemaVerificationError(e))? 233 | } else { 234 | neb_client 235 | .new_schema_with_id(neb_schema) 236 | .await 237 | .map_err(|e| SchemaError::NebSchemaExecError(e))? 238 | .map_err(|e| SchemaError::NewNebSchemaVerificationError(e))?; 239 | schema.id 240 | }; 241 | match sm_client.new_schema(&schema_id, &schema_type).await { 242 | Ok(_) => Ok(schema_id), 243 | Err(e) => Err(SchemaError::MorpheusSchemaExecError(e)), 244 | } 245 | } 246 | 247 | pub async fn del_schema(&self, schema_name: &String) -> Result<(), SchemaError> { 248 | self.neb_client 249 | .del_schema(schema_name.clone()) 250 | .await 251 | .map_err(|e| SchemaError::NebSchemaExecError(e))? 252 | .map_err(|e| SchemaError::DelNebSchemaVerificationError(e)) 253 | } 254 | 255 | pub fn schema_type(&self, schema_id: u32) -> Option { 256 | Self::schema_type_(&self.map, schema_id) 257 | } 258 | 259 | fn schema_type_(map: &Arc>, schema_id: u32) -> Option { 260 | map.get(&schema_id) 261 | } 262 | 263 | pub fn id_from_name<'a>(&self, name: &'a str) -> Option { 264 | self.neb_mata.schemas.name_to_id(name) 265 | } 266 | 267 | pub fn from_name<'a>(&self, name: &'a str) -> Option { 268 | let schema_id = self.id_from_name(name).unwrap_or(0); 269 | match self.get_neb_schema(schema_id) { 270 | Some(neb_schema) => self.neb_to_morpheus_schema(&neb_schema), 271 | None => None, 272 | } 273 | } 274 | 275 | pub fn get_neb_schema(&self, schema_id: u32) -> Option> { 276 | self.neb_mata.schemas.get(&schema_id) 277 | } 278 | pub fn neb_to_morpheus_schema(&self, schema: &Arc) -> Option { 279 | Self::neb_to_morpheus_schema_(&self.map, schema) 280 | } 281 | fn neb_to_morpheus_schema_( 282 | schema_map: &Arc>, 283 | schema: &Arc, 284 | ) -> Option { 285 | if let Some(schema_type) = Self::schema_type_(schema_map, schema.id) { 286 | if let Some(ref fields) = schema.fields.sub_fields { 287 | Some(MorpheusSchema { 288 | id: schema.id, 289 | name: schema.name.clone(), 290 | schema_type, 291 | key_field: schema.str_key_field.clone(), 292 | fields: fields.clone(), 293 | is_dynamic: schema.is_dynamic, 294 | }) 295 | } else { 296 | None 297 | } 298 | } else { 299 | None 300 | } 301 | } 302 | pub async fn all_morpheus_schemas(&self) -> Result, ExecError> { 303 | let schema_map = self.map.clone(); 304 | self.neb_client 305 | .get_all_schema() 306 | .await 307 | .map(move |neb_schemas| { 308 | neb_schemas 309 | .into_iter() 310 | .map(|schema| Self::neb_to_morpheus_schema_(&schema_map, &Arc::new(schema))) 311 | .filter_map(|ms| ms) 312 | .collect() 313 | }) 314 | } 315 | 316 | pub async fn count(&self) -> Result { 317 | self.all_morpheus_schemas().await.map(|x| x.len()) 318 | } 319 | } 320 | 321 | pub trait ToSchemaId { 322 | fn to_id(&self, schemas: &Arc) -> u32; 323 | } 324 | 325 | impl ToSchemaId for MorpheusSchema { 326 | fn to_id(&self, _: &Arc) -> u32 { 327 | self.id 328 | } 329 | } 330 | 331 | impl ToSchemaId for u32 { 332 | fn to_id(&self, _: &Arc) -> u32 { 333 | *self 334 | } 335 | } 336 | 337 | impl ToSchemaId for Schema { 338 | fn to_id(&self, _: &Arc) -> u32 { 339 | self.id 340 | } 341 | } 342 | 343 | impl ToSchemaId for Arc { 344 | fn to_id(&self, _: &Arc) -> u32 { 345 | self.id 346 | } 347 | } 348 | 349 | impl ToSchemaId for Arc { 350 | fn to_id(&self, _: &Arc) -> u32 { 351 | self.id 352 | } 353 | } 354 | 355 | impl<'a> ToSchemaId for &'a MorpheusSchema { 356 | fn to_id(&self, _: &Arc) -> u32 { 357 | self.id 358 | } 359 | } 360 | 361 | impl<'a> ToSchemaId for &'a Schema { 362 | fn to_id(&self, _: &Arc) -> u32 { 363 | self.id 364 | } 365 | } 366 | 367 | impl<'a> ToSchemaId for &'a str { 368 | fn to_id(&self, schemas: &Arc) -> u32 { 369 | schemas.id_from_name(self).unwrap_or(0) 370 | } 371 | } 372 | -------------------------------------------------------------------------------- /src/server/schema/sm.rs: -------------------------------------------------------------------------------- 1 | use bifrost::raft::state_machine::callback::server::{NotifyError, SMCallback}; 2 | use bifrost::raft::state_machine::StateMachineCtl; 3 | use bifrost::raft::RaftService; 4 | use bifrost::*; 5 | use bifrost_hasher::hash_str; 6 | use std::collections::HashMap; 7 | use std::sync::Arc; 8 | 9 | use super::GraphSchema; 10 | 11 | pub static DEFAULT_RAFT_PREFIX: &'static str = "MORPHEUS_SCHEMA_RAFT_SM"; 12 | 13 | pub struct GraphSchemasSM { 14 | map: HashMap, 15 | callback: SMCallback, 16 | sm_id: u64, 17 | } 18 | 19 | raft_state_machine! { 20 | def qry get_all() -> Vec<(u32, GraphSchema)>; 21 | def qry get(id: u32) -> Option; 22 | def cmd new_schema(id: u32, schema: GraphSchema) -> Result<(), NotifyError>; 23 | def cmd del_schema(id: u32) -> Result<(), NotifyError>; 24 | def sub on_schema_added() -> (u32, GraphSchema); 25 | def sub on_schema_deleted() -> u32; 26 | } 27 | 28 | impl StateMachineCmds for GraphSchemasSM { 29 | fn get_all<'a>(&'a self) -> BoxFuture> { 30 | future::ready(self.get_all_local()).boxed() 31 | } 32 | 33 | fn get<'a>(&'a self, id: u32) -> BoxFuture> { 34 | future::ready(self.get_local(id)).boxed() 35 | } 36 | 37 | fn new_schema<'a>( 38 | &'a mut self, 39 | id: u32, 40 | schema: GraphSchema, 41 | ) -> BoxFuture> { 42 | self.map.insert(id, schema); 43 | async move { 44 | self.callback 45 | .notify(commands::on_schema_added::new(), (id, schema)) 46 | .await?; 47 | Ok(()) 48 | } 49 | .boxed() 50 | } 51 | 52 | fn del_schema<'a>(&'a mut self, id: u32) -> BoxFuture> { 53 | self.map.remove(&id).unwrap(); 54 | async move { 55 | self.callback 56 | .notify(commands::on_schema_deleted::new(), id) 57 | .await?; 58 | Ok(()) 59 | } 60 | .boxed() 61 | } 62 | } 63 | 64 | impl StateMachineCtl for GraphSchemasSM { 65 | raft_sm_complete!(); 66 | fn id(&self) -> u64 { 67 | self.sm_id 68 | } 69 | fn snapshot(&self) -> Option> { 70 | Some(utils::serde::serialize( 71 | &self.map.iter().collect::>(), 72 | )) 73 | } 74 | fn recover(&mut self, data: Vec) -> BoxFuture<()> { 75 | let schemas: Vec<(u32, GraphSchema)> = utils::serde::deserialize(&data).unwrap(); 76 | for (k, v) in schemas { 77 | self.map.insert(k, v); 78 | } 79 | future::ready(()).boxed() 80 | } 81 | } 82 | 83 | impl GraphSchemasSM { 84 | pub async fn new<'a>(sm_id: u64, raft_service: &Arc) -> Self { 85 | Self { 86 | callback: SMCallback::new(sm_id, raft_service.clone()).await, 87 | map: HashMap::with_capacity(64), 88 | sm_id, 89 | } 90 | } 91 | fn get_all_local(&self) -> Vec<(u32, GraphSchema)> { 92 | self.map 93 | .iter() 94 | .map(|(k, v)| (*k, v.clone())) 95 | .collect::>() 96 | } 97 | 98 | fn get_local(&self, id: u32) -> Option { 99 | self.map.get(&id).map(|s| s.clone()) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/server/traversal.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/tests/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::config; 2 | use crate::server::{MorpheusServer, MorpheusServerError}; 3 | use futures::Future; 4 | use std::sync::Arc; 5 | 6 | mod graph; 7 | 8 | pub fn start_server<'a>( 9 | port: u32, 10 | group: &'a str, 11 | ) -> impl Future, MorpheusServerError>> { 12 | let replacement_address: String = format!("127.0.0.1:{}", port); 13 | let mut config = config::options_from_file("config/test_server.yaml"); 14 | config.meta_members = vec![replacement_address.clone()]; 15 | config.server_addr = replacement_address.clone(); 16 | config.group_name = format!("{}", group); 17 | MorpheusServer::new(config) 18 | } 19 | 20 | #[tokio::test] 21 | pub async fn server_startup() { 22 | let _ = env_logger::try_init(); 23 | start_server(4000, "bootstrap").await.unwrap(); 24 | } 25 | -------------------------------------------------------------------------------- /src/traversal/bfs/coordinator.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | pub struct BFSCoordinator { 4 | pub server_id: u64, 5 | pub task_id_counter: AtomicU64, 6 | } 7 | 8 | impl BFSCoordinator { 9 | pub fn new(server_id: u64) -> Self { 10 | Self { 11 | server_id, 12 | task_id_counter: AtomicU64::new(0), 13 | } 14 | } 15 | 16 | pub fn next_task_id(&self) -> TaskId { 17 | let task_id = self.task_id_counter.fetch_add(1, Ordering::SeqCst); 18 | TaskId { 19 | coordinator_server_id: self.server_id, 20 | coordinator_task_id: task_id, 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/traversal/bfs/engine.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[derive(Debug, Clone, Hash, Eq, PartialEq)] 4 | pub struct TaskId { 5 | pub coordinator_server_id: u64, 6 | pub coordinator_task_id: u64, 7 | } 8 | 9 | pub struct SharedEngine { 10 | pub graph: Arc, 11 | pub runtime: Runtime, 12 | } 13 | 14 | pub struct BFSEngine { 15 | pub server_id: u64, 16 | pub coordinator: BFSCoordinator, 17 | pub node_tasks: PtrHashMap>, 18 | pub shared_engine: Arc, 19 | } 20 | 21 | impl BFSEngine { 22 | pub fn new(server_id: u64, graph: &Arc) -> Self { 23 | let thread_counter: AtomicUsize = AtomicUsize::new(0); 24 | Self { 25 | server_id, 26 | coordinator: BFSCoordinator { 27 | server_id, 28 | task_id_counter: AtomicU64::new(0), 29 | }, 30 | node_tasks: PtrHashMap::with_capacity(DS_CAPACITY), 31 | shared_engine: Arc::new(SharedEngine { 32 | graph: graph.clone(), 33 | runtime: runtime::Builder::new_multi_thread() 34 | .enable_all() 35 | .thread_name_fn(move || { 36 | let counter = thread_counter.fetch_add(1, Ordering::SeqCst); 37 | format!("traversal-worker-{}", counter) 38 | }) 39 | .worker_threads(num_cpus::get()) 40 | .build() 41 | .unwrap(), 42 | }), 43 | } 44 | } 45 | 46 | pub fn create_task( 47 | &self, 48 | task: &'static T, 49 | params_ptr: usize, 50 | frontiers: Vec, 51 | ) -> TaskId { 52 | let task_id = self.coordinator.next_task_id(); 53 | let node = Arc::new(BFSNodeTask::new( 54 | task_id.coordinator_task_id, 55 | task, 56 | params_ptr, 57 | &self.shared_engine, 58 | )); 59 | for fid in frontiers { 60 | node.frontier.push_back(fid); 61 | } 62 | self.node_tasks.insert(task_id.clone(), node); 63 | task_id 64 | } 65 | 66 | pub fn get_graph_engine(&self) -> &Arc { 67 | &self.shared_engine.graph 68 | } 69 | 70 | pub async fn step_node_task(&self, node: &Arc) { 71 | let mut procs = vec![]; 72 | while let Some(id) = node.frontier.pop_front() { 73 | let vertex = self.shared_engine.graph.vertex_by(id).await; 74 | let (ctx, crx) = oneshot::channel(); 75 | let task = node.task; 76 | let node = node.clone(); 77 | self.shared_engine.runtime.spawn(async move { 78 | match vertex { 79 | Ok(Some(vertex)) => { 80 | let vertex_edges = task.process_vertex(&vertex, &*node).await; 81 | // Now the step edges should be updated 82 | for (_opposite_vertex_id, edge) in vertex_edges { 83 | task.process_edge(&vertex, &edge, &*node).await; 84 | } 85 | } 86 | _ => {} 87 | } 88 | let _ = ctx.send(()); 89 | }); 90 | procs.push(crx); 91 | } 92 | // BSP, wait for all the vertices and their edges to be processed 93 | for proc in procs { 94 | let _ = proc.await; 95 | } 96 | } 97 | } 98 | 99 | impl BFSNodeTask { 100 | fn new( 101 | task_id: u64, 102 | task: &'static dyn BFSTask, 103 | params_ptr: usize, 104 | shared_engine: &Arc, 105 | ) -> Self { 106 | Self { 107 | task_id, 108 | visited: PtrHashMap::with_capacity(DS_CAPACITY), 109 | cells_cache: PtrHashMap::with_capacity(DS_CAPACITY), 110 | cells_metas: PtrHashMap::with_capacity(DS_CAPACITY), 111 | next_hops: PtrHashMap::with_capacity(DS_CAPACITY), 112 | task, 113 | params_ptr, 114 | frontier: LinkedRingBufferList::new(), 115 | shared_engine: shared_engine.clone(), 116 | } 117 | } 118 | 119 | pub fn mark_visited(&mut self, id: Id) { 120 | self.visited.insert(id, ()); 121 | } 122 | 123 | pub fn is_visited(&self, id: &Id) -> bool { 124 | self.visited.contains_key(id) 125 | } 126 | 127 | pub fn cache_cell(&mut self, cell: OwnedCell) { 128 | self.cells_cache.insert(cell.id(), cell); 129 | } 130 | 131 | pub fn get_cached_cell(&self, id: &Id) -> Option> { 132 | self.cells_cache.get_ref(id) 133 | } 134 | pub fn graph(&self) -> &Arc { 135 | &self.shared_engine.graph 136 | } 137 | } 138 | 139 | impl Drop for BFSNodeTask { 140 | fn drop(&mut self) { 141 | self.task.dispose(self); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/traversal/bfs/mod.rs: -------------------------------------------------------------------------------- 1 | // Still working in progress 2 | 3 | use std::{ 4 | future::Future, 5 | sync::atomic::{AtomicU64, AtomicUsize, Ordering}, 6 | }; 7 | 8 | use dovahkiin::types::{Id, OwnedValue}; 9 | use futures::future::BoxFuture; 10 | use lightning::{ 11 | aarc::Arc, 12 | list::LinkedRingBufferList, 13 | map::{Map, PtrHashMap, PtrRef}, 14 | }; 15 | use neb::ram::cell::{Cell, OwnedCell}; 16 | use tokio::{ 17 | runtime::{self, Runtime}, 18 | sync::oneshot, 19 | }; 20 | 21 | use crate::graph::{edge::Edge, vertex::Vertex, GraphEngine}; 22 | use rayon::*; 23 | 24 | mod coordinator; 25 | use coordinator::*; 26 | 27 | mod engine; 28 | use engine::*; 29 | 30 | mod task; 31 | use task::*; 32 | 33 | unsafe impl Send for BFSNodeTask {} 34 | unsafe impl Sync for BFSNodeTask {} 35 | 36 | const DS_CAPACITY: usize = 1024; 37 | -------------------------------------------------------------------------------- /src/traversal/bfs/task.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | pub struct BFSNodeTask { 4 | pub task_id: u64, 5 | pub visited: PtrHashMap, 6 | pub cells_cache: PtrHashMap, 7 | pub cells_metas: PtrHashMap, 8 | pub next_hops: PtrHashMap, 9 | pub task: &'static dyn BFSTask, 10 | pub params_ptr: usize, 11 | 12 | pub frontier: LinkedRingBufferList, 13 | 14 | pub shared_engine: Arc, 15 | } 16 | 17 | pub trait BFSTask: Send + Sync { 18 | fn process_vertex(&self, vertex: &Vertex, node: &BFSNodeTask) 19 | -> BoxFuture<'_, Vec<(Id, Edge)>>; // Edge, opposite vertex id 20 | fn process_edge(&self, vertex: &Vertex, edge: &Edge, node: &BFSNodeTask) -> BoxFuture<'_, ()>; 21 | fn dispose(&self, node: &BFSNodeTask); 22 | } 23 | -------------------------------------------------------------------------------- /src/traversal/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod bfs; 2 | pub mod navigation; 3 | -------------------------------------------------------------------------------- /src/traversal/navigation/apps.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Serialize, Deserialize)] 4 | pub enum App { 5 | Hnsw, 6 | } 7 | 8 | impl App { 9 | pub fn to_task(&self) -> &'static dyn NavigationTask { 10 | unimplemented!() 11 | // match self { 12 | // App::Hnsw => &HnswTraversalTask, 13 | // } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/traversal/navigation/engine.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::{HashMap, HashSet}, 3 | mem, 4 | }; 5 | 6 | use async_std::sync::Mutex; 7 | use bifrost::conshash::ConsistentHashing; 8 | 9 | use crate::job::*; 10 | 11 | use super::{ 12 | server::{NodeFrontier, NodeStep}, 13 | *, 14 | }; 15 | 16 | pub struct NavigationEngine { 17 | pub server_id: u64, 18 | pub coordinator: NavigationCoordinator, 19 | pub active_tasks: PtrHashMap>>, 20 | pub shared_engine: Arc, 21 | } 22 | 23 | unsafe impl Send for NavigationEngine {} 24 | unsafe impl Sync for NavigationEngine {} 25 | 26 | pub struct SharedEngine { 27 | pub graph: Arc, 28 | pub runtime: Runtime, 29 | } 30 | 31 | unsafe impl Send for SharedEngine {} 32 | unsafe impl Sync for SharedEngine {} 33 | 34 | impl NavigationEngine { 35 | pub fn new( 36 | server_id: u64, 37 | graph: &Arc, 38 | conshash: &Arc, 39 | ) -> Self { 40 | let thread_counter: AtomicUsize = AtomicUsize::new(0); 41 | let runtime = runtime::Builder::new_multi_thread() 42 | .enable_all() 43 | .thread_name_fn(move || { 44 | let counter = thread_counter.fetch_add(1, Ordering::SeqCst); 45 | format!("navigation-worker-{}", counter) 46 | }) 47 | .worker_threads(num_cpus::get()) 48 | .build() 49 | .unwrap(); 50 | let shared_engine = Arc::new(SharedEngine { 51 | graph: graph.clone(), 52 | runtime, 53 | }); 54 | Self { 55 | server_id, 56 | coordinator: NavigationCoordinator::new(server_id, conshash, &shared_engine), 57 | active_tasks: PtrHashMap::with_capacity(DS_CAPACITY), 58 | shared_engine: shared_engine.clone(), 59 | } 60 | } 61 | 62 | // pub fn create_navigation( 63 | // &self, 64 | // task: &'static dyn NavigationTask, 65 | // params_ptr: usize, 66 | // ) -> JobId { 67 | // let job_id = self.coordinator.next_job_id(); 68 | // let initial_frontier = task.initial_frontier(params_ptr, &self.shared_engine.graph); 69 | // let node = self.new_node(job_id, task, params_ptr, initial_frontier); 70 | // job_id 71 | // } 72 | 73 | pub fn new_worker( 74 | &self, 75 | job_id: JobId, 76 | task: &'static dyn NavigationTask, 77 | params_ptr: usize, 78 | initial_frontier: Vec, 79 | ) { 80 | let worker = Arc::new(Mutex::new(NavigationWorker::new( 81 | job_id, 82 | task, 83 | params_ptr, 84 | initial_frontier, 85 | &self.shared_engine, 86 | ))); 87 | self.active_tasks.insert(job_id, worker.clone()); 88 | } 89 | 90 | pub async fn worker_navigate_step( 91 | &self, 92 | node: &mut NavigationWorker, 93 | ) -> HashMap { 94 | let batch_size = node.task.batch_size(node); 95 | let mut secondary_batch = Vec::with_capacity(batch_size); 96 | let mut batch = Vec::with_capacity(batch_size); 97 | let engine = &self.shared_engine.graph; 98 | while let Some(mut frontier_item) = node.current_frontier.pop() { 99 | while let Some(opposite_id) = frontier_item.opposite_ids.pop() { 100 | let vertex_id = opposite_id; 101 | let vertex = self.shared_engine.graph.vertex_by(vertex_id).await; 102 | if let Ok(Some(vertex)) = vertex { 103 | batch.push(vertex); 104 | } 105 | if batch.len() == batch_size { 106 | let measurements = match node.task.measure_vertices(engine, batch, node) { 107 | Ok(measurements) => measurements, 108 | Err(e) => { 109 | return HashMap::from([(Id::unit_id(), NavigationResult::Error(e))]) 110 | } 111 | }; 112 | let candidate = select_measured_vertex(measurements, node); 113 | let vertices = candidate 114 | .into_iter() 115 | .filter(|(v, _)| !v.cell.id().is_unit_id()); 116 | secondary_batch.extend(vertices); 117 | batch = Vec::with_capacity(batch_size); 118 | } 119 | } 120 | } 121 | if batch.len() > 0 { 122 | let measurements = match node.task.measure_vertices(engine, batch, node) { 123 | Ok(measurements) => measurements, 124 | Err(e) => return HashMap::from([(Id::unit_id(), NavigationResult::Error(e))]), 125 | }; 126 | let candidate = select_measured_vertex(measurements, node); 127 | let vertices = candidate 128 | .into_iter() 129 | .filter(|(v, _)| !v.cell.id().is_unit_id()); 130 | secondary_batch.extend(vertices); 131 | } 132 | let selected = if secondary_batch.len() > 1 { 133 | select_measured_vertex(secondary_batch, node) 134 | } else if secondary_batch.len() == 1 { 135 | secondary_batch 136 | } else { 137 | node.frontier_candidates.clear(); 138 | return HashMap::new(); 139 | }; 140 | let mut frontier_candidates = Vec::new(); 141 | let mut all_results = HashMap::new(); 142 | for (vertex, distance) in selected { 143 | let vertex_id = vertex.cell.id(); 144 | let (result, opposite_ids) = node.task.navigate_vertex(vertex, distance, node).await; 145 | debug_assert!(distance >= 0.0); // Should not have negative distance 146 | let next_frontier_item = NodeFrontierItem { 147 | vertex_id, 148 | distance, 149 | opposite_ids, 150 | }; 151 | frontier_candidates.push(next_frontier_item); 152 | all_results.insert(vertex_id, result); 153 | } 154 | // Save the opposite ids for the coordinator to fetch and distribute 155 | node.frontier_candidates = frontier_candidates; 156 | return all_results; 157 | } 158 | } 159 | 160 | fn select_measured_vertex<'a>( 161 | measurements: Vec<(Vertex, Distance)>, 162 | node: &'a NavigationWorker, 163 | ) -> Vec<(Vertex, Distance)> { 164 | // Avoid creating a HashMap if we have 0 or 1 measurements 165 | if measurements.len() <= 1 { 166 | return measurements; 167 | } 168 | 169 | // Pre-allocate with capacity to avoid resizing 170 | let mut indexed_measurements = Vec::with_capacity(measurements.len()); 171 | let mut vertex_map = Vec::with_capacity(measurements.len()); 172 | 173 | // Build the indexed measurements and vertex map in a single pass 174 | for (vertex, distance) in measurements.into_iter() { 175 | let vid = vertex.cell.id(); 176 | indexed_measurements.push((vid, distance)); 177 | vertex_map.push((vid, (vertex, distance))); 178 | } 179 | 180 | // Get selected indices from the task 181 | let selected_ids = node 182 | .task 183 | .select_measured(indexed_measurements, node.params_ptr); 184 | // Convert selected_ids to a HashSet for O(1) lookups 185 | let selected_set: std::collections::HashSet<_> = 186 | selected_ids.into_iter().map(|(vid, _)| vid).collect(); 187 | 188 | // Filter the vertex map 189 | vertex_map 190 | .into_iter() 191 | .filter(|(vid, _)| selected_set.contains(vid)) 192 | .map(|(_, vertex_data)| vertex_data) 193 | .collect() 194 | } 195 | -------------------------------------------------------------------------------- /src/traversal/navigation/job.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Serialize, Deserialize)] 4 | pub struct JobId { 5 | pub coordinator_server_id: u64, 6 | pub coordinator_job_id: u64, 7 | } 8 | 9 | pub struct NavigationJob { 10 | pub job_id: JobId, 11 | pub frontier_rx: Receiver>, 12 | pub step_tx: Sender, 13 | } 14 | 15 | pub struct NextStep { 16 | pub vertex_id: Id, 17 | pub distance: f64, 18 | pub opposite_ids: Vec, 19 | pub metadata: Vec, 20 | } 21 | 22 | pub enum NavigationStep { 23 | NextStep(NextStep), 24 | Terminate, 25 | Finished, 26 | } 27 | -------------------------------------------------------------------------------- /src/traversal/navigation/mod.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | future::Future, 3 | sync::atomic::{AtomicU64, AtomicUsize, Ordering}, 4 | }; 5 | 6 | use dovahkiin::types::Id; 7 | use futures::future::BoxFuture; 8 | use lightning::map::{Map, PtrHashMap}; 9 | use serde::{Deserialize, Serialize}; 10 | use std::sync::Arc; 11 | use tokio::runtime::{self, Runtime}; 12 | 13 | use crate::{ 14 | // apps::hnsw::navigate_task::HnswTraversalTask, 15 | graph::{vertex::Vertex, GraphEngine}, 16 | }; 17 | use tokio::sync::mpsc::*; 18 | 19 | pub type Distance = f32; 20 | 21 | pub mod apps; 22 | pub mod coordinator; 23 | pub mod engine; 24 | pub mod server; 25 | pub mod task; 26 | pub mod worker; 27 | 28 | pub use apps::*; 29 | pub use coordinator::*; 30 | pub use engine::*; 31 | pub use server::*; 32 | pub use task::*; 33 | pub use worker::*; 34 | 35 | const DS_CAPACITY: usize = 1024; 36 | 37 | #[derive(Debug, Clone, Serialize, Deserialize)] 38 | pub struct NodeFrontierItem { 39 | pub vertex_id: Id, 40 | pub distance: Distance, 41 | pub opposite_ids: Vec, 42 | } 43 | -------------------------------------------------------------------------------- /src/traversal/navigation/server.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use crate::{ 4 | apps::hnsw, 5 | job::{JobId, JobReport}, 6 | }; 7 | 8 | use super::*; 9 | use bifrost::{ 10 | conshash::ConsistentHashing, dispatch_rpc_service_functions, rpc::*, service, service_with_id, 11 | }; 12 | 13 | #[derive(Debug, Clone, Serialize, Deserialize, Default)] 14 | pub struct NodeFrontier { 15 | pub items: Vec, 16 | pub metadata: Vec, 17 | } 18 | 19 | #[derive(Debug, Clone, Serialize, Deserialize)] 20 | pub enum NodeStep { 21 | Step(Distance), 22 | Terminate, 23 | Finished, 24 | } 25 | 26 | #[derive(Debug, Clone, Serialize, Deserialize)] 27 | pub enum NavigationResult { 28 | Continue(Distance), 29 | Found, 30 | NotFound, 31 | Error(String), 32 | NA, 33 | } 34 | 35 | pub static DEFAULT_SERVICE_ID: u64 = hash_ident!(MORPHEUS_NAVIGATION_RPC_SERVICE) as u64; 36 | 37 | service! { 38 | // For coordinator 39 | rpc new_job(app: App, config: Vec) -> Result; 40 | rpc start_job(job_id: JobId) -> Result<(), String>; 41 | rpc job_report(job_id: JobId) -> Result; 42 | rpc stop_job(job_id: JobId) -> Result<(), String>; 43 | // For nodes 44 | rpc worker_new_task(job_id: JobId, app: App, initial_frontier: Vec, config: Vec); 45 | rpc worker_step(job_id: JobId) -> HashMap; 46 | rpc worker_frontier(job_id: JobId) -> NodeFrontier; 47 | rpc worker_new_frontier(job_id: JobId, frontier: NodeFrontier); 48 | rpc worker_terminate(job_id: JobId); 49 | rpc worker_findings(job_id: JobId) -> Result, String>; 50 | } 51 | 52 | pub struct NavigationService { 53 | engine: Arc, 54 | } 55 | 56 | impl Service for NavigationService { 57 | fn new_job<'a>( 58 | &'a self, 59 | app: App, 60 | config: Vec, 61 | ) -> futures::future::BoxFuture<'a, Result> { 62 | async move { 63 | let job_id = self.engine.coordinator.create_job(app, config); 64 | self.engine.coordinator.initialize_job(job_id).await?; 65 | return Ok(job_id); 66 | } 67 | .boxed() 68 | } 69 | 70 | fn start_job<'a>( 71 | &'a self, 72 | job_id: JobId, 73 | ) -> ::futures::future::BoxFuture<'a, Result<(), String>> { 74 | async move { 75 | // Get the params pointer from the active task, cannot get it from the coordinator 76 | let params_ptr = self 77 | .engine 78 | .active_tasks 79 | .get(&job_id) 80 | .unwrap() 81 | .lock() 82 | .await 83 | .params_ptr; 84 | self.engine.coordinator.start_job(job_id, params_ptr).await 85 | } 86 | .boxed() 87 | } 88 | 89 | fn job_report<'a>( 90 | &'a self, 91 | job_id: JobId, 92 | ) -> ::futures::future::BoxFuture<'a, Result> { 93 | self.engine.coordinator.job_report(job_id).boxed() 94 | } 95 | 96 | fn stop_job<'a>( 97 | &'a self, 98 | job_id: JobId, 99 | ) -> ::futures::future::BoxFuture<'a, Result<(), String>> { 100 | self.engine.coordinator.stop_job(job_id).boxed() 101 | } 102 | //////////////////////////////////////////////////////////////// 103 | 104 | fn worker_new_task<'a>( 105 | &'a self, 106 | job_id: JobId, 107 | app: App, 108 | initial_frontier: Vec, 109 | config: Vec, 110 | ) -> futures::future::BoxFuture<'a, ()> { 111 | let task = app.to_task(); 112 | let params_ptr = task.params_from_config(&config); 113 | let initial_frontier = vec![NodeFrontierItem { 114 | vertex_id: Id::unit_id(), 115 | distance: 0.0, 116 | opposite_ids: initial_frontier, 117 | }]; 118 | self.engine 119 | .new_worker(job_id, task, params_ptr, initial_frontier); 120 | Box::pin(future::ready(())) 121 | } 122 | 123 | fn worker_step<'a>(&'a self, job_id: JobId) -> BoxFuture<'a, HashMap> { 124 | async move { 125 | let worker = self.engine.active_tasks.get(&job_id).unwrap(); 126 | let mut workder_guard = worker.lock().await; 127 | let result = self.engine.worker_navigate_step(&mut workder_guard).await; 128 | return result; 129 | } 130 | .boxed() 131 | } 132 | 133 | fn worker_frontier<'a>(&'a self, job_id: JobId) -> BoxFuture<'a, NodeFrontier> { 134 | async move { 135 | let worker = self.engine.active_tasks.get(&job_id).unwrap(); 136 | let worker_guard = worker.lock().await; 137 | let frontier_items = worker_guard.frontier_candidates.clone(); 138 | let updates = worker_guard.task.encode_states(&*worker_guard); 139 | return NodeFrontier { 140 | items: frontier_items, 141 | metadata: updates, 142 | }; 143 | } 144 | .boxed() 145 | } 146 | 147 | fn worker_new_frontier<'a>( 148 | &'a self, 149 | job_id: JobId, 150 | frontier: NodeFrontier, 151 | ) -> BoxFuture<'a, ()> { 152 | async move { 153 | let worker = self.engine.active_tasks.get(&job_id).unwrap(); 154 | let mut worker_guard = worker.lock().await; 155 | worker_guard.current_frontier = frontier.items; 156 | worker_guard 157 | .task 158 | .update_states(&*worker_guard, &frontier.metadata); 159 | } 160 | .boxed() 161 | } 162 | 163 | fn worker_terminate<'a>(&'a self, job_id: JobId) -> ::futures::future::BoxFuture<'a, ()> { 164 | async move { 165 | let worker = self.engine.active_tasks.get(&job_id).unwrap(); 166 | let worker_guard = worker.lock().await; 167 | worker_guard.task.cleanup(&*worker_guard); 168 | self.engine.active_tasks.remove(&job_id); 169 | } 170 | .boxed() 171 | } 172 | 173 | fn worker_findings<'a>( 174 | &'a self, 175 | job_id: JobId, 176 | ) -> ::futures::future::BoxFuture<'a, Result, String>> { 177 | async move { 178 | let worker = self.engine.active_tasks.get(&job_id).unwrap(); 179 | let worker_guard = worker.lock().await; 180 | return worker_guard.task.findings(&*worker_guard).await; 181 | } 182 | .boxed() 183 | } 184 | } 185 | 186 | impl NavigationService { 187 | pub fn new( 188 | server_id: u64, 189 | graph: &Arc, 190 | conshash: &Arc, 191 | ) -> Arc { 192 | let engine = Arc::new(NavigationEngine::new(server_id, graph, conshash)); 193 | Arc::new(Self { engine }) 194 | } 195 | } 196 | 197 | dispatch_rpc_service_functions!(NavigationService); 198 | service_with_id!(NavigationService, DEFAULT_SERVICE_ID); 199 | -------------------------------------------------------------------------------- /src/traversal/navigation/task.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | pub trait NavigationTask: Send + Sync { 4 | fn measure_vertices<'a>( 5 | &'a self, 6 | engine: &Arc, 7 | vertices: Vec, 8 | node: &'a NavigationWorker, 9 | ) -> Result, String>; 10 | fn select_measured<'a>( 11 | &'a self, 12 | vertices: Vec<(Id, Distance)>, 13 | params_ptr: usize, 14 | ) -> Vec<(Id, Distance)>; 15 | fn navigate_vertex<'a>( 16 | &'a self, 17 | vertex: Vertex, 18 | distance: Distance, 19 | node: &'a NavigationWorker, 20 | ) -> BoxFuture<'a, (NavigationResult, Vec)>; 21 | fn cleanup(&self, node: &NavigationWorker); 22 | fn batch_size(&self, node: &NavigationWorker) -> usize; 23 | fn encode_states(&self, node: &NavigationWorker) -> Vec; 24 | fn update_states(&self, node: &NavigationWorker, states: &[u8]); 25 | fn params_from_config(&self, config: &Vec) -> usize; 26 | fn initial_frontier<'a>( 27 | &'a self, 28 | params_ptr: usize, 29 | graph: &'a Arc, 30 | ) -> BoxFuture<'a, Result, String>>; 31 | fn dispose(&self, node: &NavigationWorker); 32 | fn findings<'a>(&'a self, node: &'a NavigationWorker) 33 | -> BoxFuture<'a, Result, String>>; 34 | fn finallalize_findings( 35 | &self, 36 | params_ptr: usize, 37 | findings: &Vec<&Vec>, 38 | ) -> BoxFuture>; // on the coordinator 39 | fn aggregate_metadata(&self, params_ptr: usize, metadata: &Vec<&Vec>) 40 | -> BoxFuture>; // on the coordinator 41 | } 42 | -------------------------------------------------------------------------------- /src/traversal/navigation/worker.rs: -------------------------------------------------------------------------------- 1 | use crate::job::JobId; 2 | 3 | use super::*; 4 | 5 | pub struct NavigationWorker { 6 | pub job_id: JobId, 7 | pub current_frontier: Vec, 8 | pub frontier_candidates: Vec, 9 | pub task: &'static dyn NavigationTask, 10 | pub params_ptr: usize, 11 | shared_engine: Arc, 12 | } 13 | 14 | impl NavigationWorker { 15 | pub fn new( 16 | job_id: JobId, 17 | task: &'static dyn NavigationTask, 18 | params_ptr: usize, 19 | initial_frontier: Vec, 20 | shared_engine: &Arc, 21 | ) -> Self { 22 | Self { 23 | job_id, 24 | task, 25 | params_ptr, 26 | current_frontier: initial_frontier, 27 | frontier_candidates: vec![], 28 | shared_engine: shared_engine.clone(), 29 | } 30 | } 31 | 32 | pub fn graph(&self) -> &Arc { 33 | &self.shared_engine.graph 34 | } 35 | } 36 | 37 | impl Drop for NavigationWorker { 38 | fn drop(&mut self) { 39 | self.task.dispose(self); 40 | } 41 | } 42 | 43 | unsafe impl Send for NavigationWorker {} 44 | unsafe impl Sync for NavigationWorker {} 45 | -------------------------------------------------------------------------------- /src/utils/bloom_filter.rs: -------------------------------------------------------------------------------- 1 | use ahash::RandomState; 2 | use serde::{Deserialize, Serialize}; 3 | use std::hash::{BuildHasher, Hash, Hasher}; 4 | 5 | /// Seeds for the two hash functions used in the Bloom filter. 6 | /// These are arbitrary but fixed values to ensure consistent hashing. 7 | const HASH_SEED_1: usize = 0x1234_5678_9ABC_DEF0; 8 | const HASH_SEED_2: usize = 0xFEDC_BA98_7654_3210; 9 | 10 | /// A simple Bloom filter implementation. 11 | /// 12 | /// A Bloom filter is a space-efficient probabilistic data structure that is used to test 13 | /// whether an element is a member of a set. False positives are possible, but false negatives are not. 14 | /// 15 | /// This implementation uses ahash for fast hashing and bitwise operations for efficient storage. 16 | /// The size is always rounded up to the next power of 2 for efficient bit operations. 17 | #[derive(Clone, Debug, Serialize, Deserialize)] 18 | pub struct BloomFilter { 19 | bits: Vec, 20 | num_hashes: usize, 21 | size: usize, 22 | size_mask: usize, // Mask for bit operations instead of modulo 23 | } 24 | 25 | impl BloomFilter { 26 | /// Creates a new Bloom filter with the specified size (in bytes) and number of hash functions. 27 | /// Size will be rounded up to the next power of 2 for efficient operations. 28 | pub fn new(size_in_bytes: usize, num_hashes: usize) -> Self { 29 | // Round up to the next power of 2 30 | let size_in_bytes = Self::next_power_of_two(size_in_bytes); 31 | let size = size_in_bytes * 8; // Convert bytes to bits 32 | 33 | BloomFilter { 34 | bits: vec![0; size_in_bytes], 35 | num_hashes, 36 | size, 37 | size_mask: size - 1, // For efficient modulo with bitwise AND 38 | } 39 | } 40 | 41 | /// Creates a new Bloom filter with optimal size and hash count for the expected number of elements 42 | /// and desired false positive probability. 43 | pub fn with_rate(expected_elements: usize, false_positive_rate: f64) -> Self { 44 | // Calculate optimal size (in bits) 45 | let mut size = Self::optimal_size(expected_elements, false_positive_rate); 46 | // Round up to the next power of 2 47 | size = Self::next_power_of_two(size / 8) * 8; 48 | 49 | // Calculate optimal number of hash functions 50 | let num_hashes = Self::optimal_hashes(size, expected_elements); 51 | 52 | BloomFilter { 53 | bits: vec![0; size >> 3], // Convert bits to bytes with right shift 54 | num_hashes, 55 | size, 56 | size_mask: size - 1, 57 | } 58 | } 59 | 60 | /// Rounds up to the next power of 2 61 | fn next_power_of_two(n: usize) -> usize { 62 | n.next_power_of_two() 63 | } 64 | 65 | /// Calculates the optimal size in bits for the given parameters 66 | fn optimal_size(expected_elements: usize, false_positive_rate: f64) -> usize { 67 | let size = 68 | -((expected_elements as f64) * false_positive_rate.ln()) / (2.0_f64.ln().powi(2)); 69 | size.ceil() as usize 70 | } 71 | 72 | /// Calculates the optimal number of hash functions for the given parameters 73 | fn optimal_hashes(size: usize, expected_elements: usize) -> usize { 74 | let hashes = (size as f64 / expected_elements as f64) * 2.0_f64.ln(); 75 | hashes.ceil() as usize 76 | } 77 | 78 | /// Inserts an element into the Bloom filter. 79 | pub fn insert(&mut self, item: &T) { 80 | // Use two different hash builders with different seeds 81 | let hash_builder1 = RandomState::with_seed(HASH_SEED_1); 82 | let hash_builder2 = RandomState::with_seed(HASH_SEED_2); 83 | 84 | let mut hasher1 = hash_builder1.build_hasher(); 85 | let mut hasher2 = hash_builder2.build_hasher(); 86 | 87 | item.hash(&mut hasher1); 88 | item.hash(&mut hasher2); 89 | 90 | let hash1 = hasher1.finish(); 91 | let hash2 = hasher2.finish(); 92 | 93 | for i in 0..self.num_hashes { 94 | // Use double hashing to generate multiple hash values 95 | let combined_hash = hash1.wrapping_add(i as u64).wrapping_mul(hash2); 96 | let index = (combined_hash as usize) & self.size_mask; 97 | let byte_index = index >> 3; // Equivalent to index / 8 98 | let bit_index = index & 0x7; // Equivalent to index % 8 99 | self.bits[byte_index] |= 1 << bit_index; 100 | } 101 | } 102 | 103 | /// Checks if an element might be in the Bloom filter. 104 | /// Returns true if the element might be in the set, false if it definitely is not. 105 | pub fn contains(&self, item: &T) -> bool { 106 | // Use the same hash builders as in insert 107 | let hash_builder1 = RandomState::with_seed(HASH_SEED_1); 108 | let hash_builder2 = RandomState::with_seed(HASH_SEED_2); 109 | 110 | let mut hasher1 = hash_builder1.build_hasher(); 111 | let mut hasher2 = hash_builder2.build_hasher(); 112 | 113 | item.hash(&mut hasher1); 114 | item.hash(&mut hasher2); 115 | 116 | let hash1 = hasher1.finish(); 117 | let hash2 = hasher2.finish(); 118 | 119 | for i in 0..self.num_hashes { 120 | // Use double hashing to generate multiple hash values 121 | let combined_hash = hash1.wrapping_add(i as u64).wrapping_mul(hash2); 122 | let index = (combined_hash as usize) & self.size_mask; 123 | let byte_index = index >> 3; // Equivalent to index / 8 124 | let bit_index = index & 0x7; // Equivalent to index % 8 125 | if (self.bits[byte_index] & (1 << bit_index)) == 0 { 126 | return false; 127 | } 128 | } 129 | true 130 | } 131 | 132 | /// Clears the Bloom filter, removing all elements. 133 | pub fn clear(&mut self) { 134 | self.bits.fill(0); 135 | } 136 | 137 | /// Returns the approximate number of elements in the Bloom filter. 138 | pub fn approximate_count(&self) -> usize { 139 | let m = self.size as f64; 140 | let k = self.num_hashes as f64; 141 | let x = self.count_set_bits() as f64; 142 | 143 | let estimate = -(m / k) * (1.0 - x / m).ln(); 144 | estimate.round() as usize 145 | } 146 | 147 | /// Counts the number of bits set to 1 in the filter 148 | fn count_set_bits(&self) -> usize { 149 | self.bits 150 | .iter() 151 | .map(|&byte| byte.count_ones() as usize) 152 | .sum() 153 | } 154 | 155 | /// Returns the size of the Bloom filter in bits 156 | pub fn size(&self) -> usize { 157 | self.size 158 | } 159 | 160 | /// Returns the number of hash functions used 161 | pub fn num_hashes(&self) -> usize { 162 | self.num_hashes 163 | } 164 | 165 | /// Serializes the Bloom filter to a byte vector efficiently 166 | pub fn to_bytes(&self) -> Vec { 167 | bincode::serialize(self).unwrap_or_default() 168 | } 169 | 170 | /// Deserializes a Bloom filter from a byte vector 171 | pub fn from_bytes(bytes: &[u8]) -> Result { 172 | bincode::deserialize(bytes) 173 | } 174 | } 175 | 176 | #[cfg(test)] 177 | mod tests { 178 | use super::*; 179 | 180 | #[test] 181 | fn test_bloom_filter_basic() { 182 | let mut filter = BloomFilter::new(128, 3); 183 | 184 | // Insert some elements 185 | filter.insert(&"apple"); 186 | filter.insert(&"banana"); 187 | filter.insert(&"cherry"); 188 | 189 | // These should be found 190 | assert!(filter.contains(&"apple")); 191 | assert!(filter.contains(&"banana")); 192 | assert!(filter.contains(&"cherry")); 193 | 194 | // This should not be found 195 | assert!(!filter.contains(&"durian")); 196 | 197 | // Clear the filter 198 | filter.clear(); 199 | assert!(!filter.contains(&"apple")); 200 | } 201 | 202 | #[test] 203 | fn test_bloom_filter_with_rate() { 204 | let filter = BloomFilter::with_rate(1000, 0.01); 205 | assert!(filter.size() > 0); 206 | assert!(filter.num_hashes() > 0); 207 | 208 | // Verify the size is a power of 2 209 | assert_eq!(filter.size() & (filter.size() - 1), 0); 210 | } 211 | 212 | #[test] 213 | fn test_bloom_filter_serialization() { 214 | let mut filter = BloomFilter::new(128, 3); 215 | filter.insert(&"apple"); 216 | filter.insert(&"banana"); 217 | 218 | let bytes = filter.to_bytes(); 219 | let deserialized = BloomFilter::from_bytes(&bytes).unwrap(); 220 | 221 | assert!(deserialized.contains(&"apple")); 222 | assert!(deserialized.contains(&"banana")); 223 | assert!(!deserialized.contains(&"cherry")); 224 | } 225 | 226 | #[test] 227 | fn test_bloom_filter_approximate_count() { 228 | let mut filter = BloomFilter::new(128, 3); 229 | 230 | // Insert 100 elements 231 | for i in 0..100 { 232 | filter.insert(&i); 233 | } 234 | 235 | let count = filter.approximate_count(); 236 | // The count should be reasonably close to 100 237 | assert!(count >= 90 && count <= 110); // Allow for more variance 238 | } 239 | 240 | #[test] 241 | fn test_bloom_filter_false_positives() { 242 | let mut filter = BloomFilter::new(128, 3); 243 | 244 | // Insert some elements 245 | for i in 0..50 { 246 | filter.insert(&i); 247 | } 248 | 249 | // Check for non-existent elements 250 | let mut false_positives = 0; 251 | for i in 100..200 { 252 | if filter.contains(&i) { 253 | false_positives += 1; 254 | } 255 | } 256 | 257 | // False positive rate should be reasonable 258 | assert!(false_positives < 5); // Less than 5% false positives 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /src/utils/file.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io; 3 | use std::io::prelude::*; 4 | 5 | pub fn slurp<'a>(file: &'a str) -> io::Result { 6 | let mut file = File::open(file)?; 7 | let mut contents = String::new(); 8 | file.read_to_string(&mut contents)?; 9 | return Ok(contents); 10 | } 11 | -------------------------------------------------------------------------------- /src/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod bloom_filter; 2 | pub mod file; 3 | pub mod ring_buffer; 4 | pub mod transaction; 5 | -------------------------------------------------------------------------------- /src/utils/ring_buffer.rs: -------------------------------------------------------------------------------- 1 | use std::mem::MaybeUninit; 2 | use std::ops::{Index, IndexMut}; 3 | 4 | /// A fixed-size ring buffer that overwrites the oldest elements when full. 5 | /// 6 | /// The buffer has a capacity specified at creation time and stores elements of type T. 7 | /// When the buffer is full, adding new elements will overwrite the oldest ones. 8 | /// The capacity must be a power of 2 for efficient indexing operations. 9 | pub struct RingBuffer { 10 | /// The internal buffer to store elements 11 | buffer: Vec>, 12 | /// The current position where the next element will be written 13 | write_pos: usize, 14 | /// The number of elements currently in the buffer 15 | count: usize, 16 | /// Mask for efficient indexing (capacity-1) 17 | mask: usize, 18 | } 19 | 20 | impl RingBuffer { 21 | /// Creates a new empty ring buffer with the specified capacity. 22 | /// 23 | /// # Panics 24 | /// 25 | /// Panics if capacity is not a power of 2. 26 | pub fn new(cap: usize) -> Self { 27 | assert!(cap.is_power_of_two(), "Buffer size must be a power of 2"); 28 | 29 | // Create a vector with uninitialized elements 30 | let mut buffer = Vec::with_capacity(cap); 31 | // Safety: We're creating uninitialized memory that will be properly initialized before use 32 | unsafe { 33 | buffer.set_len(cap); 34 | } 35 | 36 | Self { 37 | buffer, 38 | write_pos: 0, 39 | count: 0, 40 | mask: cap - 1, 41 | } 42 | } 43 | 44 | /// Adds an element to the buffer, potentially overwriting the oldest element if full. 45 | pub fn push(&mut self, item: T) { 46 | let capacity = self.buffer.len(); 47 | 48 | // If the buffer is full, we need to drop the oldest element 49 | if self.count == capacity { 50 | let oldest_index = (self.write_pos + capacity - self.count) & self.mask; 51 | unsafe { 52 | std::ptr::drop_in_place(self.buffer[oldest_index].as_mut_ptr()); 53 | } 54 | } 55 | 56 | self.buffer[self.write_pos] = MaybeUninit::new(item); 57 | self.write_pos = (self.write_pos + 1) & self.mask; 58 | if self.count < capacity { 59 | self.count += 1; 60 | } 61 | } 62 | 63 | /// Returns a reference to the element at the specified index, if it exists. 64 | pub fn get(&self, index: usize) -> Option<&T> { 65 | if index >= self.count { 66 | return None; 67 | } 68 | 69 | let capacity = self.buffer.len(); 70 | let actual_index = (self.write_pos + capacity - self.count + index) & self.mask; 71 | // Safety: We know this element is initialized because index < self.count 72 | Some(unsafe { &*self.buffer[actual_index].as_ptr() }) 73 | } 74 | 75 | /// Returns a mutable reference to the element at the specified index, if it exists. 76 | pub fn get_mut(&mut self, index: usize) -> Option<&mut T> { 77 | if index >= self.count { 78 | return None; 79 | } 80 | 81 | let capacity = self.buffer.len(); 82 | let actual_index = (self.write_pos + capacity - self.count + index) & self.mask; 83 | // Safety: We know this element is initialized because index < self.count 84 | Some(unsafe { &mut *self.buffer[actual_index].as_mut_ptr() }) 85 | } 86 | 87 | /// Returns the number of elements currently in the buffer. 88 | pub fn len(&self) -> usize { 89 | self.count 90 | } 91 | 92 | /// Returns true if the buffer is empty. 93 | pub fn is_empty(&self) -> bool { 94 | self.count == 0 95 | } 96 | 97 | /// Returns true if the buffer is full. 98 | pub fn is_full(&self) -> bool { 99 | self.count == self.buffer.len() 100 | } 101 | 102 | /// Clears the buffer, removing all elements. 103 | pub fn clear(&mut self) { 104 | let capacity = self.buffer.len(); 105 | // Drop all initialized elements 106 | for i in 0..self.count { 107 | let index = (self.write_pos + capacity - self.count + i) & self.mask; 108 | unsafe { 109 | std::ptr::drop_in_place(self.buffer[index].as_mut_ptr()); 110 | } 111 | } 112 | self.write_pos = 0; 113 | self.count = 0; 114 | } 115 | 116 | /// Returns an iterator over the elements in the buffer. 117 | pub fn iter(&self) -> impl Iterator { 118 | (0..self.count).filter_map(move |i| self.get(i)) 119 | } 120 | 121 | pub fn extend(&mut self, items: impl IntoIterator) { 122 | for item in items { 123 | self.push(item); 124 | } 125 | } 126 | 127 | pub fn new_extend(items: impl IntoIterator, cap: usize) -> Self { 128 | let mut buffer = Self::new(cap); 129 | buffer.extend(items); 130 | buffer 131 | } 132 | 133 | pub fn last(&self) -> Option<&T> { 134 | self.get(self.count - 1) 135 | } 136 | 137 | pub fn capacity(&self) -> usize { 138 | self.buffer.len() 139 | } 140 | 141 | /// Merges two ring buffers into a new one, sorting the elements. 142 | /// 143 | /// This function takes two ring buffers and merges their elements into a new 144 | /// ring buffer, sorting them in ascending order. The capacity of the new buffer 145 | /// will be the same as the current buffer. 146 | /// 147 | /// # Arguments 148 | /// 149 | /// * `other` - Another ring buffer to merge with this one 150 | /// 151 | /// # Type Parameters 152 | /// 153 | /// * `T` - The type of elements in the buffer, which must implement `PartialOrd` 154 | /// 155 | /// # Returns 156 | /// 157 | /// A new `RingBuffer` containing the sorted elements from both buffers. 158 | pub fn merge_sorted(&self, other: &RingBuffer, compare: F) -> RingBuffer 159 | where 160 | F: Fn(&T, &T) -> std::cmp::Ordering, 161 | T: Clone, 162 | { 163 | let mut merged = RingBuffer::new(self.buffer.len()); 164 | let mut buffer = Vec::with_capacity(self.count + other.count); 165 | 166 | // Collect elements from both buffers 167 | buffer.extend(self.iter().cloned()); 168 | buffer.extend(other.iter().cloned()); 169 | 170 | // Sort the elements using the provided comparison function 171 | buffer.sort_by(compare); 172 | 173 | // Add elements to the new buffer (up to capacity) 174 | let take_count = buffer.len().min(merged.buffer.len()); 175 | if take_count > 0 { 176 | merged.extend(buffer.into_iter().take(take_count)); 177 | } 178 | 179 | merged 180 | } 181 | 182 | /// Merges two ring buffers into a new one, sorting the elements using their natural ordering. 183 | /// 184 | /// This is a convenience method that calls `merge_sorted` with a comparison function 185 | /// that uses the natural ordering of the elements. 186 | /// 187 | /// # Arguments 188 | /// 189 | /// * `other` - Another ring buffer to merge with this one 190 | /// 191 | /// # Type Parameters 192 | /// 193 | /// * `T` - The type of elements in the buffer, which must implement `Ord` 194 | /// 195 | /// # Returns 196 | /// 197 | /// A new `RingBuffer` containing the sorted elements from both buffers. 198 | pub fn merge(&self, other: &RingBuffer) -> RingBuffer 199 | where 200 | T: Clone + Ord, 201 | T2: Clone, 202 | T2: Into, 203 | { 204 | let mut merged = RingBuffer::new(self.buffer.len()); 205 | let mut buffer = Vec::with_capacity(self.count + other.count); 206 | 207 | // Collect elements from both buffers 208 | buffer.extend(self.iter().cloned()); 209 | buffer.extend(other.iter().cloned().map(|item| item.into())); 210 | 211 | // Sort the elements 212 | buffer.sort(); 213 | 214 | // Add elements to the new buffer (up to capacity) 215 | let take_count = buffer.len().min(merged.buffer.len()); 216 | if take_count > 0 { 217 | merged.extend(buffer.into_iter().take(take_count)); 218 | } 219 | 220 | merged 221 | } 222 | } 223 | 224 | impl RingBuffer { 225 | pub fn to_vec(&self) -> Vec { 226 | self.iter().cloned().collect() 227 | } 228 | } 229 | 230 | impl RingBuffer { 231 | pub fn extend_from_slice(&mut self, items: &[T]) { 232 | for item in items { 233 | self.push(*item); 234 | } 235 | } 236 | } 237 | 238 | impl Index for RingBuffer { 239 | type Output = T; 240 | 241 | fn index(&self, index: usize) -> &Self::Output { 242 | self.get(index).expect("Index out of bounds") 243 | } 244 | } 245 | 246 | impl IndexMut for RingBuffer { 247 | fn index_mut(&mut self, index: usize) -> &mut Self::Output { 248 | self.get_mut(index).expect("Index out of bounds") 249 | } 250 | } 251 | 252 | impl Default for RingBuffer { 253 | fn default() -> Self { 254 | Self::new(16) // Default capacity of 16 255 | } 256 | } 257 | 258 | impl Drop for RingBuffer { 259 | fn drop(&mut self) { 260 | self.clear(); 261 | } 262 | } 263 | 264 | #[cfg(test)] 265 | mod tests { 266 | use super::*; 267 | 268 | #[test] 269 | fn test_new_buffer_is_empty() { 270 | let buffer: RingBuffer = RingBuffer::new(8); 271 | assert!(buffer.is_empty()); 272 | assert_eq!(buffer.len(), 0); 273 | assert!(!buffer.is_full()); 274 | } 275 | 276 | #[test] 277 | #[should_panic(expected = "Buffer size must be a power of 2")] 278 | fn test_non_power_of_two_size() { 279 | let _: RingBuffer = RingBuffer::new(3); 280 | } 281 | 282 | #[test] 283 | fn test_push_and_get() { 284 | let mut buffer: RingBuffer = RingBuffer::new(4); 285 | 286 | buffer.push(1); 287 | buffer.push(2); 288 | 289 | assert_eq!(buffer.get(0), Some(&1)); 290 | assert_eq!(buffer.get(1), Some(&2)); 291 | assert_eq!(buffer.get(2), None); 292 | 293 | assert_eq!(buffer.len(), 2); 294 | assert!(!buffer.is_empty()); 295 | assert!(!buffer.is_full()); 296 | } 297 | 298 | #[test] 299 | fn test_overwrite_when_full() { 300 | let mut buffer: RingBuffer = RingBuffer::new(4); 301 | 302 | buffer.push(1); 303 | buffer.push(2); 304 | buffer.push(3); 305 | buffer.push(4); 306 | assert!(buffer.is_full()); 307 | 308 | // This should overwrite the oldest element (1) 309 | buffer.push(5); 310 | 311 | assert_eq!(buffer.get(0), Some(&2)); 312 | assert_eq!(buffer.get(1), Some(&3)); 313 | assert_eq!(buffer.get(2), Some(&4)); 314 | assert_eq!(buffer.get(3), Some(&5)); 315 | assert_eq!(buffer.len(), 4); 316 | } 317 | 318 | #[test] 319 | fn test_index_operator() { 320 | let mut buffer: RingBuffer = RingBuffer::new(4); 321 | 322 | buffer.push(1); 323 | buffer.push(2); 324 | 325 | assert_eq!(buffer[0], 1); 326 | assert_eq!(buffer[1], 2); 327 | } 328 | 329 | #[test] 330 | #[should_panic(expected = "Index out of bounds")] 331 | fn test_index_out_of_bounds() { 332 | let buffer: RingBuffer = RingBuffer::new(4); 333 | let _ = buffer[0]; // This should panic 334 | } 335 | 336 | #[test] 337 | fn test_clear() { 338 | let mut buffer: RingBuffer = RingBuffer::new(4); 339 | 340 | buffer.push(1); 341 | buffer.push(2); 342 | buffer.clear(); 343 | 344 | assert!(buffer.is_empty()); 345 | assert_eq!(buffer.len(), 0); 346 | assert_eq!(buffer.get(0), None); 347 | } 348 | 349 | #[test] 350 | fn test_iter() { 351 | let mut buffer: RingBuffer = RingBuffer::new(4); 352 | 353 | buffer.push(1); 354 | buffer.push(2); 355 | buffer.push(3); 356 | 357 | let collected: Vec = buffer.iter().cloned().collect(); 358 | assert_eq!(collected, vec![1, 2, 3]); 359 | } 360 | 361 | #[test] 362 | fn test_get_mut() { 363 | let mut buffer: RingBuffer = RingBuffer::new(4); 364 | 365 | buffer.push(1); 366 | buffer.push(2); 367 | 368 | if let Some(val) = buffer.get_mut(0) { 369 | *val = 10; 370 | } 371 | 372 | assert_eq!(buffer.get(0), Some(&10)); 373 | assert_eq!(buffer.get(1), Some(&2)); 374 | } 375 | 376 | #[test] 377 | fn test_wrap_around() { 378 | let mut buffer: RingBuffer = RingBuffer::new(4); 379 | 380 | // Fill the buffer 381 | for i in 1..=4 { 382 | buffer.push(i); 383 | } 384 | 385 | // Push more elements to test wrap-around 386 | buffer.push(5); 387 | buffer.push(6); 388 | 389 | // Check that the oldest elements were overwritten 390 | assert_eq!(buffer.get(0), Some(&3)); 391 | assert_eq!(buffer.get(1), Some(&4)); 392 | assert_eq!(buffer.get(2), Some(&5)); 393 | assert_eq!(buffer.get(3), Some(&6)); 394 | } 395 | 396 | #[test] 397 | fn test_bit_masking() { 398 | let mut buffer: RingBuffer = RingBuffer::new(8); 399 | 400 | // Fill the buffer and then some 401 | for i in 0..12 { 402 | buffer.push(i); 403 | } 404 | 405 | // The buffer should contain the last 8 elements (4-11) 406 | for i in 0..8 { 407 | assert_eq!(buffer.get(i), Some(&(i as i32 + 4))); 408 | } 409 | } 410 | 411 | #[test] 412 | fn test_merge_sorted() { 413 | let mut buffer1: RingBuffer = RingBuffer::new(4); 414 | let mut buffer2: RingBuffer = RingBuffer::new(4); 415 | 416 | buffer1.push(1); 417 | buffer1.push(3); 418 | buffer1.push(5); 419 | 420 | buffer2.push(2); 421 | buffer2.push(4); 422 | buffer2.push(6); 423 | 424 | let merged = buffer1.merge_sorted(&buffer2, |a, b| a.cmp(b)); 425 | 426 | assert_eq!(merged.count, 4); // Limited by capacity 427 | assert_eq!(merged.get(0), Some(&1)); 428 | assert_eq!(merged.get(1), Some(&2)); 429 | assert_eq!(merged.get(2), Some(&3)); 430 | assert_eq!(merged.get(3), Some(&4)); 431 | } 432 | 433 | #[test] 434 | fn test_merge() { 435 | let mut buffer1: RingBuffer = RingBuffer::new(4); 436 | let mut buffer2: RingBuffer = RingBuffer::new(4); 437 | 438 | buffer1.push(5); 439 | buffer1.push(3); 440 | buffer1.push(1); 441 | 442 | buffer2.push(6); 443 | buffer2.push(4); 444 | buffer2.push(2); 445 | 446 | let merged = buffer1.merge(&buffer2); 447 | 448 | assert_eq!(merged.count, 4); // Limited by capacity 449 | assert_eq!(merged.get(0), Some(&1)); 450 | assert_eq!(merged.get(1), Some(&2)); 451 | assert_eq!(merged.get(2), Some(&3)); 452 | assert_eq!(merged.get(3), Some(&4)); 453 | } 454 | 455 | #[test] 456 | fn test_merge_with_conversion() { 457 | let mut buffer1: RingBuffer = RingBuffer::new(4); 458 | let mut buffer2: RingBuffer = RingBuffer::new(4); 459 | 460 | buffer1.push(5); 461 | buffer1.push(3); 462 | buffer1.push(1); 463 | 464 | buffer2.push(6); 465 | buffer2.push(4); 466 | buffer2.push(2); 467 | 468 | let merged = buffer1.merge(&buffer2); 469 | 470 | assert_eq!(merged.count, 4); // Limited by capacity 471 | assert_eq!(merged.get(0), Some(&1)); 472 | assert_eq!(merged.get(1), Some(&2)); 473 | assert_eq!(merged.get(2), Some(&3)); 474 | assert_eq!(merged.get(3), Some(&4)); 475 | } 476 | 477 | #[test] 478 | fn test_merge_sorted_custom_comparison() { 479 | let mut buffer1: RingBuffer = RingBuffer::new(4); 480 | let mut buffer2: RingBuffer = RingBuffer::new(4); 481 | 482 | buffer1.push(1); 483 | buffer1.push(3); 484 | buffer1.push(5); 485 | 486 | buffer2.push(2); 487 | buffer2.push(4); 488 | buffer2.push(6); 489 | 490 | // Reverse order comparison 491 | let merged = buffer1.merge_sorted(&buffer2, |a, b| b.cmp(a)); 492 | 493 | assert_eq!(merged.count, 4); // Limited by capacity 494 | assert_eq!(merged.get(0), Some(&6)); 495 | assert_eq!(merged.get(1), Some(&5)); 496 | assert_eq!(merged.get(2), Some(&4)); 497 | assert_eq!(merged.get(3), Some(&3)); 498 | } 499 | } 500 | -------------------------------------------------------------------------------- /src/utils/transaction.rs: -------------------------------------------------------------------------------- 1 | use dovahkiin::types::{Map, OwnedValue}; 2 | use neb::client::transaction::{Transaction, TxnError}; 3 | use neb::ram::types::Id; 4 | 5 | pub async fn set_map_by_key_id( 6 | txn: &Transaction, 7 | cell_id: Id, 8 | key_id: u64, 9 | value: OwnedValue, 10 | ) -> Result, TxnError> { 11 | match txn.read(cell_id).await? { 12 | Some(mut cell) => { 13 | if let &mut OwnedValue::Map(ref mut map) = &mut cell.data { 14 | map.insert_key_id(key_id, value); 15 | } else { 16 | return Ok(None); 17 | } 18 | txn.update(cell).await?; 19 | return Ok(Some(())); 20 | } 21 | None => Ok(None), 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /tests/server.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test.rs: -------------------------------------------------------------------------------- 1 | mod server; 2 | --------------------------------------------------------------------------------