├── .github
    └── workflows
    │   └── rust.yml
├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE
├── config
    ├── log4rs.yaml
    └── test_server.yaml
├── log
    └── requests.log
├── scripts
    └── repeatdly_model_test.sh
├── src
    ├── apps
    │   ├── hnsw
    │   │   ├── coordinator
    │   │   │   ├── mod.rs
    │   │   │   └── tests.rs
    │   │   ├── measurements.rs
    │   │   ├── mod.rs
    │   │   └── partition
    │   │   │   ├── index.rs
    │   │   │   ├── mod.rs
    │   │   │   ├── schema.rs
    │   │   │   ├── search.rs
    │   │   │   ├── service.rs
    │   │   │   ├── tests.rs
    │   │   │   ├── types.rs
    │   │   │   └── utils.rs
    │   └── mod.rs
    ├── config
    │   └── mod.rs
    ├── graph
    │   ├── edge
    │   │   ├── bilateral.rs
    │   │   ├── directed.rs
    │   │   ├── hyper.rs
    │   │   ├── macros.rs
    │   │   ├── mod.rs
    │   │   └── undirectd.rs
    │   ├── fields.rs
    │   ├── id_list.rs
    │   ├── local
    │   │   └── mod.rs
    │   ├── mod.rs
    │   ├── partitioner
    │   │   ├── mod.rs
    │   │   ├── vector.rs
    │   │   └── vector_test.rs
    │   └── vertex
    │   │   └── mod.rs
    ├── job
    │   ├── logger.rs
    │   ├── mod.rs
    │   └── service.rs
    ├── main.rs
    ├── query
    │   ├── mod.rs
    │   └── symbols
    │   │   ├── crud
    │   │       ├── cell.rs
    │   │       ├── edge.rs
    │   │       ├── mod.rs
    │   │       └── vertex.rs
    │   │   └── mod.rs
    ├── server
    │   ├── general.rs
    │   ├── mod.rs
    │   ├── schema
    │   │   ├── mod.rs
    │   │   └── sm.rs
    │   └── traversal.rs
    ├── tests
    │   ├── graph
    │   │   └── mod.rs
    │   └── mod.rs
    ├── traversal
    │   ├── bfs
    │   │   ├── coordinator.rs
    │   │   ├── engine.rs
    │   │   ├── mod.rs
    │   │   └── task.rs
    │   ├── mod.rs
    │   └── navigation
    │   │   ├── apps.rs
    │   │   ├── coordinator.rs
    │   │   ├── engine.rs
    │   │   ├── job.rs
    │   │   ├── mod.rs
    │   │   ├── server.rs
    │   │   ├── task.rs
    │   │   └── worker.rs
    └── utils
    │   ├── bloom_filter.rs
    │   ├── file.rs
    │   ├── mod.rs
    │   ├── ring_buffer.rs
    │   └── transaction.rs
└── tests
    ├── server.rs
    └── test.rs


/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "develop" ]
 6 |   pull_request:
 7 |     branches: [ "develop" ]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v4
19 |     - name: Build
20 |       run: cargo build --verbose
21 |     - name: Run tests
22 |       run: cargo test --verbose -- --test-threads=1
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Leiningen template
  3 | pom.xml
  4 | pom.xml.asc
  5 | *jar
  6 | /lib/
  7 | /classes/
  8 | /target/
  9 | /checkouts/
 10 | .lein-deps-sum
 11 | .lein-repl-history
 12 | .lein-plugins/
 13 | .lein-failures
 14 | .nrepl-port
 15 | ### Java template
 16 | *.class
 17 | 
 18 | # Mobile Tools for Java (J2ME)
 19 | .mtj.tmp/
 20 | 
 21 | # Package Files #
 22 | *.jar
 23 | *.war
 24 | *.ear
 25 | 
 26 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
 27 | hs_err_pid*
 28 | ### OSX template
 29 | .DS_Store
 30 | .AppleDouble
 31 | .LSOverride
 32 | 
 33 | # Icon must end with two \r
 34 | Icon
 35 | 
 36 | # Thumbnails
 37 | ._*
 38 | 
 39 | # Files that might appear in the root of a volume
 40 | .DocumentRevisions-V100
 41 | .fseventsd
 42 | .Spotlight-V100
 43 | .TemporaryItems
 44 | .Trashes
 45 | .VolumeIcon.icns
 46 | 
 47 | # Directories potentially created on remote AFP share
 48 | .AppleDB
 49 | .AppleDesktop
 50 | Network Trash Folder
 51 | Temporary Items
 52 | .apdisk
 53 | ### JetBrains template
 54 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio
 55 | 
 56 | *.iml
 57 | 
 58 | ## Directory-based project format:
 59 | .idea/
 60 | # if you remove the above rule, at least ignore the following:
 61 | 
 62 | # User-specific stuff:
 63 | # .idea/workspace.xml
 64 | # .idea/tasks.xml
 65 | # .idea/dictionaries
 66 | 
 67 | # Sensitive or high-churn files:
 68 | # .idea/dataSources.ids
 69 | # .idea/dataSources.xml
 70 | # .idea/sqlDataSources.xml
 71 | # .idea/dynamic.xml
 72 | # .idea/uiDesigner.xml
 73 | 
 74 | # Gradle:
 75 | # .idea/gradle.xml
 76 | # .idea/libraries
 77 | 
 78 | # Mongo Explorer plugin:
 79 | # .idea/mongoSettings.xml
 80 | 
 81 | ## File-based project format:
 82 | *.ipr
 83 | *.iws
 84 | 
 85 | ## Plugin-specific files:
 86 | 
 87 | # IntelliJ
 88 | /out/
 89 | 
 90 | # mpeltonen/sbt-idea plugin
 91 | .idea_modules/
 92 | 
 93 | # JIRA plugin
 94 | atlassian-ide-plugin.xml
 95 | 
 96 | # Crashlytics plugin (for Android Studio and IntelliJ)
 97 | com_crashlytics_export_strings.xml
 98 | crashlytics.properties
 99 | crashlytics-build.properties
100 | 
101 | #Testing configurations
102 | /configures
103 | 
104 | #Testing data
105 | /data
106 | /wikidata*
107 | /computation
108 | .dat
109 | 
110 | 
111 | ### Rust template
112 | # Generated by Cargo
113 | 
114 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
115 | # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock
116 | Cargo.lock
117 | 
118 | # These are backup files generated by rustfmt
119 | **/*.rs.bk
120 | 
121 | .idea


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: rust
2 | 
3 | rust:
4 |   - nightly
5 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "morpheus"
 3 | version = "0.1.0"
 4 | authors = ["Hao Shi <shisoftgenius@gmail.com>"]
 5 | edition = "2021"
 6 | 
 7 | [dependencies]
 8 | neb = { git = "https://github.com/ShisoftResearch/Nebuchadnezzar", branch = "develop" }
 9 | bifrost = { git = "https://github.com/shisoft/bifrost", branch = "develop" }
10 | bifrost_plugins = { git = "https://github.com/shisoft/bifrost", branch = "develop" }
11 | bifrost_hasher = { git = "https://github.com/shisoft/bifrost", branch = "develop" }
12 | bifrost_proc_macro = { git = "https://github.com/shisoft/bifrost", branch = "develop" }
13 | dovahkiin = { git = "https://github.com/ShisoftResearch/Dovahkiin.git", branch = "develop" }
14 | lightning-containers = { git = "ssh://git@192.168.10.134/shisoft-x/Lightning.git", branch = "develop" }
15 | lazy_static = "*"
16 | futures = "0.3"
17 | parking_lot = {version = "0.12", features = ["nightly"]}
18 | tokio = { version = "1.23.0", features = ["full"] }
19 | async-stream = "0.3"
20 | serde = "*"
21 | serde_derive = "*"
22 | log = "0.4"
23 | log4rs = "*"
24 | env_logger = "0.11"
25 | yaml-rust = "*"
26 | serde_yaml = "*"
27 | rand = "0.9"
28 | libc = "*"
29 | rayon = "1.10.0"
30 | num_cpus = "1.16.0"
31 | num-traits = "0.2.19"
32 | bincode = "1.3.3"
33 | async-std = "1"
34 | ahash = "0.8.11"
35 | rand_distr = "0.5.1"
36 | once_cell = "1.21.3"
37 | itertools = "0.14.0"
38 | ndarray = "0.16.1"
39 | 


--------------------------------------------------------------------------------
/config/log4rs.yaml:
--------------------------------------------------------------------------------
 1 | refresh_rate: 30 seconds
 2 | appenders:
 3 |   stdout:
 4 |     kind: console
 5 |   requests:
 6 |     kind: file
 7 |     path: "log/requests.log"
 8 |     encoder:
 9 |       pattern: "{d} - {m}{n}"
10 | root:
11 |   level: Debug
12 |   appenders:
13 |     - stdout
14 | loggers:
15 |   app::backend::db:
16 |     level: info
17 |   app::requests:
18 |     level: info
19 |     appenders:
20 |       - requests
21 |     additive: false


--------------------------------------------------------------------------------
/config/test_server.yaml:
--------------------------------------------------------------------------------
 1 | server_addr: 127.0.0.1:5400
 2 | group_name: Morpheus
 3 | meta_members:
 4 |   - 127.0.0.1:5400
 5 | storage: 
 6 |   chunk_count: 8
 7 |   memory_size: 8192 # 8GB
 8 |   backup_storage: null
 9 |   wal_storage: null
10 |   services:
11 |     - Cell
12 |     - Transaction
13 |     - RangedIndexer
14 |     - Query
15 |   index_enabled: true


--------------------------------------------------------------------------------
/log/requests.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShisoftResearch/Morpheus/9ea3151c9c138371b582777faa8f9c817e986d9d/log/requests.log


--------------------------------------------------------------------------------
/scripts/repeatdly_model_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | while true
4 | do
5 | cargo test --color=always --package morpheus --bin morpheus relationship -- --nocapture
6 | done


--------------------------------------------------------------------------------
/src/apps/hnsw/coordinator/tests.rs:
--------------------------------------------------------------------------------
  1 | use std::cell::RefCell;
  2 | use std::sync::Arc;
  3 | use std::time::Instant;
  4 | 
  5 | use bifrost::conshash::ConsistentHashing;
  6 | use bifrost::rpc::cluster::client_by_server_id;
  7 | use dovahkiin::data_map;
  8 | use dovahkiin::types::{Id, Map, OwnedPrimArray, OwnedValue, Type};
  9 | use neb::client::AsyncClient;
 10 | use neb::ram::cell::OwnedCell;
 11 | use neb::ram::schema::{Field, Schema as NebSchema};
 12 | 
 13 | use crate::apps::hnsw::coordinator::{AsyncServiceClient, HNSWIndexService};
 14 | use crate::apps::hnsw::measurements::MetricEncoding;
 15 | use crate::apps::hnsw::partition::schema::initialize_schemas;
 16 | use crate::apps::hnsw::HNSWPartitionService;
 17 | use crate::job::logger::JobLogger;
 18 | use crate::server::MorpheusServer;
 19 | use crate::tests::start_server;
 20 | use crate::traversal::navigation::Distance;
 21 | 
 22 | const CELL_SCHEMA_ID: u32 = hash_ident!("CELL");
 23 | const VECTOR: &str = "VECTOR";
 24 | const VECTOR_FIELD_ID: u64 = hash_ident!(VECTOR) as u64;
 25 | 
 26 | /// Struct containing test data vector
 27 | #[derive(Debug, Clone)]
 28 | pub struct TestVector {
 29 |     pub cell_id: Id,            // The ID of the associated cell
 30 |     pub vector: OwnedPrimArray, // Vector data
 31 | }
 32 | 
 33 | impl TestVector {
 34 |     pub fn new(cell_higher: u64, cell_lower: u64, vector: OwnedPrimArray) -> Self {
 35 |         Self {
 36 |             cell_id: Id::new(cell_higher, cell_lower),
 37 |             vector,
 38 |         }
 39 |     }
 40 | 
 41 |     pub fn to_data_cell(&self) -> OwnedCell {
 42 |         let cell_id = self.cell_id;
 43 |         let map = data_map!(VECTOR: OwnedValue::PrimArray(self.vector.clone()));
 44 |         let cell = OwnedCell::new_with_id(CELL_SCHEMA_ID, &cell_id, OwnedValue::Map(map));
 45 |         cell
 46 |     }
 47 | }
 48 | 
 49 | /// Test environment structure to hold all components needed for HNSW coordinator tests
 50 | pub struct TestEnvironment {
 51 |     pub job_counter: RefCell<u64>,
 52 |     pub group_name: String,
 53 |     pub field_id: u64,
 54 |     pub schema_id: u32,
 55 |     pub server_port: u32,
 56 |     pub partition_id: u64,
 57 |     pub test_vectors: Vec<TestVector>,
 58 |     pub neb_client: Option<Arc<AsyncClient>>,
 59 |     pub conshash: Option<Arc<ConsistentHashing>>,
 60 |     pub job_logger: Option<Arc<JobLogger>>,
 61 |     pub partition: Option<Arc<HNSWPartitionService>>,
 62 |     pub coordinator: Option<Arc<HNSWIndexService>>,
 63 |     pub coordinator_client: Option<Arc<AsyncServiceClient>>,
 64 |     pub morpheus: Option<Arc<MorpheusServer>>,
 65 | }
 66 | 
 67 | impl TestEnvironment {
 68 |     /// Create a new test environment with test vectors
 69 |     pub fn new(
 70 |         server_port: u32,
 71 |         group_name: &str,
 72 |         partition_id: u64,
 73 |         field_id: u64,
 74 |         schema_id: u32,
 75 |     ) -> Self {
 76 |         let _ = env_logger::try_init();
 77 |         Self {
 78 |             job_counter: RefCell::new(1),
 79 |             group_name: group_name.to_string(),
 80 |             server_port,
 81 |             partition_id,
 82 |             field_id,
 83 |             schema_id,
 84 |             test_vectors: Vec::new(),
 85 |             neb_client: None,
 86 |             conshash: None,
 87 |             job_logger: None,
 88 |             partition: None,
 89 |             coordinator: None,
 90 |             coordinator_client: None,
 91 |             morpheus: None,
 92 |         }
 93 |     }
 94 | 
 95 |     /// Add test vectors to the environment
 96 |     pub fn with_test_vectors(mut self, vectors: Vec<Vec<f32>>) -> Self {
 97 |         // Create test vectors with IDs
 98 |         let test_vectors = vectors
 99 |             .into_iter()
100 |             .enumerate()
101 |             .map(|(i, vector)| {
102 |                 let cell_higher = self.partition_id; // Most share the same partition ID
103 |                 let cell_lower = 1000 + i as u64; // Unique cell IDs
104 |                 TestVector::new(cell_higher, cell_lower, OwnedPrimArray::F32(vector))
105 |             })
106 |             .collect();
107 |         self.test_vectors = test_vectors;
108 |         self
109 |     }
110 | 
111 |     /// Initialize the JobLogger
112 |     pub fn with_job_logger(mut self) -> Self {
113 |         self.job_logger = Some(Arc::new(JobLogger::with_capacity(16)));
114 |         self
115 |     }
116 | 
117 |     pub async fn initialize_server(mut self) -> Self {
118 |         let server = start_server(self.server_port, &self.group_name)
119 |             .await
120 |             .unwrap();
121 |         let server_id = server.neb_server.server_id;
122 |         self.morpheus = Some(server.clone());
123 |         self.conshash = Some(server.neb_server.consh.clone());
124 |         self.partition = Some(server.init_hnsw_index_partition_service().await.unwrap());
125 |         self.coordinator = Some(server.init_hnsw_index_service().await.unwrap());
126 |         self.coordinator_client = Some(
127 |             client_by_server_id(&server.neb_server.consh, server_id)
128 |                 .await
129 |                 .unwrap(),
130 |         );
131 |         self.neb_client = Some(server.neb_client.clone());
132 |         self
133 |     }
134 | 
135 |     fn cell_schema(&self) -> NebSchema {
136 |         NebSchema::new_with_id(
137 |             CELL_SCHEMA_ID,
138 |             "text_schema",
139 |             None,
140 |             Field::new_schema(vec![Field::new_unindexed_array(VECTOR, Type::F32)]),
141 |             false,
142 |             false,
143 |         )
144 |     }
145 | 
146 |     pub async fn initialize_schemas(self) -> Result<Self, String> {
147 |         let neb_client = self.neb_client.as_ref().unwrap();
148 |         let morph = self.morpheus.as_ref().unwrap();
149 |         neb_client
150 |             .new_schema_with_id(self.cell_schema())
151 |             .await
152 |             .map_err(|e| format!("Failed to create cell schema: {:?}", e))?
153 |             .map_err(|e| format!("Failed to create cell schema: {:?}", e))?;
154 |         initialize_schemas(morph).await.unwrap();
155 |         Ok(self)
156 |     }
157 | 
158 |     /// Initialize all cells
159 |     pub async fn initialize_cells(self) -> Result<Self, String> {
160 |         if self.neb_client.is_none() {
161 |             return Err("NebClient not initialized".to_string());
162 |         }
163 | 
164 |         let neb_client = self.neb_client.as_ref().unwrap();
165 | 
166 |         // Write data cells
167 |         for test_vector in &self.test_vectors {
168 |             let data_cell = test_vector.to_data_cell();
169 |             if let Err(e) = neb_client.write_cell(data_cell).await {
170 |                 return Err(format!("Failed to write data cell: {:?}", e));
171 |             }
172 |         }
173 |         Ok(self)
174 |     }
175 | 
176 |     pub async fn create_index(&self) -> Result<(), String> {
177 |         if self.morpheus.is_none() {
178 |             return Err("Morpheus not initialized".to_string());
179 |         }
180 |         let client = self.coordinator_client.as_ref().unwrap();
181 |         client
182 |             .new_index("test_index".to_string(), CELL_SCHEMA_ID, VECTOR_FIELD_ID)
183 |             .await
184 |             .map_err(|e| format!("Failed to create index: {:?}", e))?
185 |             .map_err(|e| format!("Failed to create index: {:?}", e))?;
186 |         Ok(())
187 |     }
188 | 
189 |     /// Index cells using the coordinator
190 |     pub async fn index_cells(&self) -> Result<(), String> {
191 |         let ef_construction = 256;
192 |         if self.morpheus.is_none() {
193 |             return Err("Morpheus not initialized".to_string());
194 |         }
195 |         let client = self.coordinator_client.as_ref().unwrap();
196 | 
197 |         for test_vector in &self.test_vectors {
198 |             client
199 |                 .new_cell(
200 |                     CELL_SCHEMA_ID,
201 |                     VECTOR_FIELD_ID,
202 |                     test_vector.cell_id,
203 |                     ef_construction,
204 |                     MetricEncoding::L2,
205 |                 )
206 |                 .await
207 |                 .map_err(|e| format!("Failed to index cell: {:?}", e))?
208 |                 .map_err(|e| format!("Failed to index cell: {:?}", e))?;
209 |         }
210 |         Ok(())
211 |     }
212 | 
213 |     /// Run a search query using the coordinator
214 |     pub async fn top_k(&self, query: Vec<f32>, k: u32) -> Result<Vec<(Id, Distance)>, String> {
215 |         let start = Instant::now();
216 |         if self.coordinator.is_none() {
217 |             return Err("Coordinator not initialized".to_string());
218 |         }
219 |         let client = self.coordinator_client.as_ref().unwrap();
220 |         let results = client
221 |             .query_top_k(
222 |                 CELL_SCHEMA_ID,
223 |                 VECTOR_FIELD_ID,
224 |                 OwnedPrimArray::F32(query),
225 |                 k,
226 |                 32, // ef
227 |                 10, // max_iter
228 |                 MetricEncoding::L2,
229 |             )
230 |             .await
231 |             .map_err(|e| format!("Failed to search: {:?}", e))?
232 |             .map_err(|e| format!("Failed to search: {:?}", e))?;
233 |         let duration = start.elapsed();
234 |         println!("Search took {:?} microseconds", duration.as_micros());
235 |         Ok(results)
236 |     }
237 | }
238 | 
239 | // Simple helper for creating test vectors
240 | pub fn create_test_vectors(count: usize, dim: usize) -> Vec<Vec<Distance>> {
241 |     let mut vectors = Vec::with_capacity(count);
242 |     for i in 0..count {
243 |         let vector = (0..dim).map(|j| (i * dim + j) as Distance * 0.1).collect();
244 |         vectors.push(vector);
245 |     }
246 |     vectors
247 | }
248 | 
249 | #[cfg(test)]
250 | mod tests {
251 |     use super::*;
252 | 
253 |     #[tokio::test]
254 |     async fn test_create_index() {
255 |         let env = TestEnvironment::new(
256 |             5000,
257 |             "test_create_index",
258 |             1,
259 |             VECTOR_FIELD_ID,
260 |             CELL_SCHEMA_ID,
261 |         )
262 |         .with_job_logger()
263 |         .initialize_server()
264 |         .await
265 |         .initialize_schemas()
266 |         .await
267 |         .unwrap();
268 |         env.create_index().await.unwrap();
269 |     }
270 | 
271 |     #[tokio::test]
272 |     async fn test_index_one_cell() {
273 |         let env = TestEnvironment::new(
274 |             5001,
275 |             "test_index_one_cell",
276 |             1,
277 |             VECTOR_FIELD_ID,
278 |             CELL_SCHEMA_ID,
279 |         )
280 |         .with_test_vectors(vec![vec![1.0, 2.0, 3.0]])
281 |         .with_job_logger()
282 |         .initialize_server()
283 |         .await
284 |         .initialize_schemas()
285 |         .await
286 |         .unwrap()
287 |         .initialize_cells()
288 |         .await
289 |         .unwrap();
290 |         env.create_index().await.unwrap();
291 |         env.index_cells().await.unwrap();
292 |     }
293 | 
294 |     #[tokio::test]
295 |     async fn test_index_and_query_one_cell() {
296 |         let env = TestEnvironment::new(
297 |             5002,
298 |             "test_index_and_query_one_cell",
299 |             1,
300 |             VECTOR_FIELD_ID,
301 |             CELL_SCHEMA_ID,
302 |         )
303 |         .with_test_vectors(vec![vec![1.0, 2.0, 3.0]])
304 |         .with_job_logger()
305 |         .initialize_server()
306 |         .await
307 |         .initialize_schemas()
308 |         .await
309 |         .unwrap()
310 |         .initialize_cells()
311 |         .await
312 |         .unwrap();
313 |         env.create_index().await.unwrap();
314 |         env.index_cells().await.unwrap();
315 | 
316 |         let query = vec![1.0, 2.0, 3.0];
317 |         let results = env.top_k(query, 1).await.unwrap();
318 |         assert_eq!(results.len(), 1);
319 |         assert_eq!(results[0].0.lower, 1000); // First cell
320 |         assert_eq!(results[0].1, 0.0); // Exact match
321 |     }
322 | 
323 |     #[tokio::test]
324 |     async fn test_index_and_query_multiple_cells() {
325 |         let env = TestEnvironment::new(
326 |             5003,
327 |             "test_index_and_query_multiple_cells",
328 |             1,
329 |             VECTOR_FIELD_ID,
330 |             CELL_SCHEMA_ID,
331 |         )
332 |         .with_test_vectors(vec![
333 |             vec![1.0, 2.0, 3.0],
334 |             vec![4.0, 5.0, 6.0],
335 |             vec![7.0, 8.0, 9.0],
336 |         ])
337 |         .with_job_logger()
338 |         .initialize_server()
339 |         .await
340 |         .initialize_schemas()
341 |         .await
342 |         .unwrap()
343 |         .initialize_cells()
344 |         .await
345 |         .unwrap();
346 |         env.create_index().await.unwrap();
347 |         env.index_cells().await.unwrap();
348 | 
349 |         let query = vec![4.0, 5.0, 6.0];
350 |         let results = env.top_k(query, 2).await.unwrap();
351 |         assert_eq!(results.len(), 2);
352 |         assert_eq!(results[0].0.lower, 1001); // Second cell
353 |         assert_eq!(results[0].1, 0.0); // Exact match
354 |         assert!(results[1].0.lower == 1000 || results[1].0.lower == 1002); // First or third cell
355 |     }
356 | 
357 |     #[tokio::test]
358 |     async fn test_index_and_query_many_cells() {
359 |         // Create 50 test vectors
360 |         let mut test_vectors = Vec::with_capacity(50);
361 |         for i in 0..50 {
362 |             test_vectors.push(vec![i as f32, (i + 1) as f32, (i + 2) as f32]);
363 |         }
364 | 
365 |         let env = TestEnvironment::new(
366 |             5004,
367 |             "test_index_and_query_many_cells",
368 |             1,
369 |             VECTOR_FIELD_ID,
370 |             CELL_SCHEMA_ID,
371 |         )
372 |         .with_test_vectors(test_vectors)
373 |         .with_job_logger()
374 |         .initialize_server()
375 |         .await
376 |         .initialize_schemas()
377 |         .await
378 |         .unwrap()
379 |         .initialize_cells()
380 |         .await
381 |         .unwrap();
382 |         env.create_index().await.unwrap();
383 |         env.index_cells().await.unwrap();
384 | 
385 |         let query = vec![10.0, 11.0, 12.0];
386 |         let k = 5;
387 |         let results = env.top_k(query, k).await.unwrap();
388 |         assert_eq!(results.len(), k as usize);
389 |         assert_eq!(results[0].0.lower, 1010); // Tenth cell
390 |         assert_eq!(results[0].1, 0.0); // Exact match
391 |         assert!(results[1].0.lower == 1009 || results[1].0.lower == 1011); // Neighboring cells
392 |     }
393 | 
394 |     #[tokio::test]
395 |     async fn test_error_cases() {
396 |         let env =
397 |             TestEnvironment::new(5005, "test_error_cases", 1, VECTOR_FIELD_ID, CELL_SCHEMA_ID)
398 |                 .with_job_logger()
399 |                 .initialize_server()
400 |                 .await
401 |                 .initialize_schemas()
402 |                 .await
403 |                 .unwrap();
404 | 
405 |         // Test querying before creating index
406 |         let query = vec![1.0, 2.0, 3.0];
407 |         let result = env.top_k(query, 1).await;
408 |         assert!(result.is_err());
409 | 
410 |         // Test querying with invalid schema
411 |         env.create_index().await.unwrap();
412 |         let result = env
413 |             .coordinator_client
414 |             .as_ref()
415 |             .unwrap()
416 |             .query_top_k(
417 |                 999, // Invalid schema
418 |                 VECTOR_FIELD_ID,
419 |                 OwnedPrimArray::F32(vec![1.0, 2.0, 3.0]),
420 |                 1,
421 |                 32,
422 |                 10,
423 |                 MetricEncoding::L2,
424 |             )
425 |             .await;
426 |         match result {
427 |             Ok(Ok(_)) => panic!("Expected error"),
428 |             Ok(Err(e)) => {
429 |                 println!("Should be schema error: {:?}", e);
430 |             }
431 |             Err(e) => panic!("RPC Error: {:?}", e),
432 |         }
433 |     }
434 | }
435 | 


--------------------------------------------------------------------------------
/src/apps/hnsw/measurements.rs:
--------------------------------------------------------------------------------
  1 | use dovahkiin::types::OwnedPrimArray;
  2 | use ndarray::ArrayView1;
  3 | use num_traits::{FromPrimitive, Num, One, Zero};
  4 | use std::fmt::Debug;
  5 | 
  6 | use crate::traversal::navigation::Distance;
  7 | 
  8 | pub use neb::index::vector::MetricEncoding;
  9 | 
 10 | /// A trait for computing the distance between two vectors of generic numbers.
 11 | /// The type parameter N represents the numeric type.
 12 | pub trait HnswMetric: Clone + Copy {
 13 |     fn distance<N>(&self, a: &[N], b: &[N]) -> N
 14 |     where
 15 |         N: Num + FromPrimitive + Zero + Pow<N, Output = N> + PartialOrd + Copy + Debug + 'static;
 16 | }
 17 | 
 18 | /// L2Metric computes the Euclidean distance (sum of squared differences).
 19 | #[derive(Debug, Clone, Copy)]
 20 | pub struct L2Metric;
 21 | 
 22 | impl HnswMetric for L2Metric {
 23 |     fn distance<N>(&self, a: &[N], b: &[N]) -> N
 24 |     where
 25 |         N: Num + FromPrimitive + Zero + Pow<N, Output = N> + PartialOrd + Copy + Debug,
 26 |     {
 27 |         // Create ndarray views
 28 |         let a_arr = ArrayView1::from(a);
 29 |         let b_arr = ArrayView1::from(b);
 30 |         // Compute (a - b) elementwise, square each difference and sum them.
 31 |         (&a_arr - &b_arr).mapv(|x| x * x).sum()
 32 |     }
 33 | }
 34 | 
 35 | /// CosineMetric computes cosine distance as 1 - (dot(a, b) / (||a|| * ||b||)).
 36 | /// Best used with floating-point types.
 37 | #[derive(Debug, Clone, Copy)]
 38 | pub struct CosineMetric;
 39 | 
 40 | impl HnswMetric for CosineMetric {
 41 |     fn distance<N>(&self, a: &[N], b: &[N]) -> N
 42 |     where
 43 |         // For cosine, the implementation assumes the ability to convert from f64.
 44 |         N: Num + FromPrimitive + Zero + Pow<N, Output = N> + PartialOrd + Copy + Debug + 'static,
 45 |     {
 46 |         let one = N::one();
 47 |         let a_arr = ArrayView1::from(a);
 48 |         let b_arr = ArrayView1::from(b);
 49 |         // Compute the dot product using ndarray's dot method.
 50 |         let dot = a_arr.dot(&b_arr);
 51 |         // For the norms, compute the squared sum and then “raise” it to the 0.5 power.
 52 |         let exponent = N::from_f64(0.5).unwrap();
 53 |         let a_norm_squared: N = a_arr.mapv(|x| x * x).sum();
 54 |         let b_norm_squared: N = b_arr.mapv(|x| x * x).sum();
 55 |         let a_norm = a_norm_squared.pow(exponent);
 56 |         let b_norm = b_norm_squared.pow(exponent);
 57 |         if a_norm == N::zero() || b_norm == N::zero() {
 58 |             return one;
 59 |         }
 60 |         one - (dot / (a_norm * b_norm))
 61 |     }
 62 | }
 63 | 
 64 | /// ManhattanMetric computes the Manhattan distance (sum of absolute differences).
 65 | #[derive(Debug, Clone, Copy)]
 66 | pub struct ManhattanMetric;
 67 | 
 68 | impl HnswMetric for ManhattanMetric {
 69 |     fn distance<N>(&self, a: &[N], b: &[N]) -> N
 70 |     where
 71 |         N: Num + FromPrimitive + Zero + Pow<N, Output = N> + PartialOrd + Copy + Debug,
 72 |     {
 73 |         let a_arr = ArrayView1::from(a);
 74 |         let b_arr = ArrayView1::from(b);
 75 |         // Zip the arrays and sum the absolute differences.
 76 |         a_arr
 77 |             .iter()
 78 |             .zip(b_arr.iter())
 79 |             .fold(N::zero(), |acc, (&x, &y)| {
 80 |                 let diff = if x < y { y - x } else { x - y };
 81 |                 acc + diff
 82 |             })
 83 |     }
 84 | }
 85 | 
 86 | /// ChebyshevMetric computes the Chebyshev distance (maximum absolute difference).
 87 | #[derive(Debug, Clone, Copy)]
 88 | pub struct ChebyshevMetric;
 89 | 
 90 | impl HnswMetric for ChebyshevMetric {
 91 |     fn distance<N>(&self, a: &[N], b: &[N]) -> N
 92 |     where
 93 |         N: Num + FromPrimitive + Zero + Pow<N, Output = N> + PartialOrd + Copy + Debug,
 94 |     {
 95 |         let a_arr = ArrayView1::from(a);
 96 |         let b_arr = ArrayView1::from(b);
 97 |         a_arr
 98 |             .iter()
 99 |             .zip(b_arr.iter())
100 |             .fold(N::zero(), |max, (&x, &y)| {
101 |                 let diff = if x < y { y - x } else { x - y };
102 |                 if diff > max {
103 |                     diff
104 |                 } else {
105 |                     max
106 |                 }
107 |             })
108 |     }
109 | }
110 | 
111 | /// An enum to encapsulate the various metric types.
112 | #[derive(Debug, Clone, Copy)]
113 | pub enum Metric {
114 |     L2(L2Metric),
115 |     Cosine(CosineMetric),
116 |     Manhattan(ManhattanMetric),
117 |     Chebyshev(ChebyshevMetric),
118 | }
119 | 
120 | impl Metric {
121 |     pub fn to_encoding(self) -> MetricEncoding {
122 |         match self {
123 |             Metric::L2(_metric) => MetricEncoding::L2,
124 |             Metric::Cosine(_metric) => MetricEncoding::Cosine,
125 |             Metric::Manhattan(_metric) => MetricEncoding::Manhattan,
126 |             Metric::Chebyshev(_metric) => MetricEncoding::Chebyshev,
127 |         }
128 |     }
129 | 
130 |     pub fn from_encoding(metric: MetricEncoding) -> Self {
131 |         match metric {
132 |             MetricEncoding::L2 => Metric::L2(L2Metric),
133 |             MetricEncoding::Cosine => Metric::Cosine(CosineMetric),
134 |             MetricEncoding::Manhattan => Metric::Manhattan(ManhattanMetric),
135 |             MetricEncoding::Chebyshev => Metric::Chebyshev(ChebyshevMetric),
136 |         }
137 |     }
138 | }
139 | 
140 | /// A trait to support exponentiation. For floats we use powf and for integers their native pow.
141 | pub trait Pow<RHS = Self> {
142 |     type Output;
143 |     fn pow(self, rhs: RHS) -> Self::Output;
144 | }
145 | 
146 | impl Pow<f32> for f32 {
147 |     type Output = f32;
148 |     fn pow(self, rhs: f32) -> f32 {
149 |         self.powf(rhs)
150 |     }
151 | }
152 | 
153 | impl Pow<f64> for f64 {
154 |     type Output = f64;
155 |     fn pow(self, rhs: f64) -> f64 {
156 |         self.powf(rhs)
157 |     }
158 | }
159 | 
160 | impl Pow<u32> for u32 {
161 |     type Output = u32;
162 |     fn pow(self, rhs: u32) -> u32 {
163 |         self.pow(rhs)
164 |     }
165 | }
166 | 
167 | impl Pow<u32> for i32 {
168 |     type Output = i32;
169 |     fn pow(self, rhs: u32) -> i32 {
170 |         self.pow(rhs)
171 |     }
172 | }
173 | 
174 | impl Pow<u32> for u64 {
175 |     type Output = u64;
176 |     fn pow(self, rhs: u32) -> u64 {
177 |         self.pow(rhs)
178 |     }
179 | }
180 | 
181 | impl Pow<u8> for u8 {
182 |     type Output = u8;
183 |     fn pow(self, rhs: u8) -> u8 {
184 |         self.pow(rhs as u32)
185 |     }
186 | }
187 | 
188 | impl Pow<u16> for u16 {
189 |     type Output = u16;
190 |     fn pow(self, rhs: u16) -> u16 {
191 |         self.pow(rhs as u32)
192 |     }
193 | }
194 | 
195 | impl Pow<i8> for i8 {
196 |     type Output = i8;
197 |     fn pow(self, rhs: i8) -> i8 {
198 |         self.pow(rhs as u32)
199 |     }
200 | }
201 | 
202 | impl Pow<i16> for i16 {
203 |     type Output = i16;
204 |     fn pow(self, rhs: i16) -> i16 {
205 |         self.pow(rhs as u32)
206 |     }
207 | }
208 | 
209 | impl Pow<i32> for i32 {
210 |     type Output = i32;
211 |     fn pow(self, rhs: i32) -> i32 {
212 |         self.pow(rhs as u32)
213 |     }
214 | }
215 | 
216 | impl Pow<i64> for i64 {
217 |     type Output = i64;
218 |     fn pow(self, rhs: i64) -> i64 {
219 |         self.pow(rhs as u32)
220 |     }
221 | }
222 | 
223 | #[derive(Debug, Clone, Copy)]
224 | pub enum DistanceError {
225 |     ValueTypeMismatch,
226 |     ArrayTypeNotSupported,
227 | }
228 | 
229 | pub fn vector_distance<M>(
230 |     metric: M,
231 |     vector_a: &OwnedPrimArray,
232 |     vector_b: &OwnedPrimArray,
233 | ) -> Option<Distance>
234 | where
235 |     M: HnswMetric,
236 | {
237 |     match (vector_a, vector_b) {
238 |         // float
239 |         (OwnedPrimArray::F32(va), OwnedPrimArray::F32(vb)) => {
240 |             let m = metric.distance(va.as_slice(), vb);
241 |             return Some(m as Distance);
242 |         }
243 |         (OwnedPrimArray::F64(va), OwnedPrimArray::F64(vb)) => {
244 |             let m = metric.distance(va.as_slice(), vb);
245 |             return Some(m as Distance);
246 |         }
247 |         // // int
248 |         (OwnedPrimArray::I8(va), OwnedPrimArray::I8(vb)) => {
249 |             let m = metric.distance(va.as_slice(), vb);
250 |             return Some(m as Distance);
251 |         }
252 |         (OwnedPrimArray::I16(va), OwnedPrimArray::I16(vb)) => {
253 |             let m = metric.distance(va.as_slice(), vb);
254 |             return Some(m as Distance);
255 |         }
256 |         (OwnedPrimArray::I32(va), OwnedPrimArray::I32(vb)) => {
257 |             let m = metric.distance(va.as_slice(), vb);
258 |             return Some(m as Distance);
259 |         }
260 |         (OwnedPrimArray::I64(va), OwnedPrimArray::I64(vb)) => {
261 |             let m = metric.distance(va.as_slice(), vb);
262 |             return Some(m as Distance);
263 |         }
264 |         // uint
265 |         (OwnedPrimArray::U8(va), OwnedPrimArray::U8(vb)) => {
266 |             let m = metric.distance(va.as_slice(), vb);
267 |             return Some(m as Distance);
268 |         }
269 |         (OwnedPrimArray::U16(va), OwnedPrimArray::U16(vb)) => {
270 |             let m = metric.distance(va.as_slice(), vb);
271 |             return Some(m as Distance);
272 |         }
273 |         (OwnedPrimArray::U32(va), OwnedPrimArray::U32(vb)) => {
274 |             let m = metric.distance(va.as_slice(), vb);
275 |             return Some(m as Distance);
276 |         }
277 |         _ => None,
278 |     }
279 | }
280 | 
281 | #[cfg(test)]
282 | mod tests {
283 |     use super::*;
284 |     use std::f64::consts::PI;
285 | 
286 |     #[test]
287 |     fn test_l2_metric() {
288 |         let metric = L2Metric;
289 | 
290 |         // Test with simple vectors
291 |         let a = vec![1.0, 2.0, 3.0];
292 |         let b = vec![4.0, 5.0, 6.0];
293 |         let distance = metric.distance(&a, &b);
294 |         assert_eq!(distance, 27.0); // (3^2 + 3^2 + 3^2) = 27
295 | 
296 |         // Test with zero distance
297 |         let c = vec![1.0, 2.0, 3.0];
298 |         let distance = metric.distance(&a, &c);
299 |         assert_eq!(distance, 0.0);
300 | 
301 |         // Test with negative values
302 |         let d = vec![-1.0, -2.0, -3.0];
303 |         let distance = metric.distance(&a, &d);
304 |         assert_eq!(distance, 56.0); // (2^2 + 4^2 + 6^2) = 56
305 |     }
306 | 
307 |     #[test]
308 |     fn test_cosine_metric() {
309 |         let metric = CosineMetric;
310 | 
311 |         // Test with orthogonal vectors (should be 1.0 - 0.0 = 1.0)
312 |         let a = vec![1.0f64, 0.0];
313 |         let b = vec![0.0, 1.0];
314 |         let distance = metric.distance(&a, &b);
315 |         assert!((distance - 1.0).abs() < 1e-10);
316 | 
317 |         // Test with parallel vectors (should be 1.0 - 1.0 = 0.0)
318 |         let c = vec![2.0f64, 4.0];
319 |         let d = vec![1.0, 2.0];
320 |         let distance = metric.distance(&c, &d);
321 |         assert!(distance.abs() < 1e-10);
322 |     }
323 | 
324 |     #[test]
325 |     fn test_manhattan_metric() {
326 |         let metric = ManhattanMetric;
327 | 
328 |         // Test with simple vectors
329 |         let a = vec![1.0, 2.0, 3.0];
330 |         let b = vec![4.0, 5.0, 6.0];
331 |         let distance = metric.distance(&a, &b);
332 |         assert_eq!(distance, 9.0); // |4-1| + |5-2| + |6-3| = 3 + 3 + 3 = 9
333 | 
334 |         // Test with zero distance
335 |         let c = vec![1.0, 2.0, 3.0];
336 |         let distance = metric.distance(&a, &c);
337 |         assert_eq!(distance, 0.0);
338 | 
339 |         // Test with negative values
340 |         let d = vec![-1.0, -2.0, -3.0];
341 |         let distance = metric.distance(&a, &d);
342 |         assert_eq!(distance, 12.0); // |1-(-1)| + |2-(-2)| + |3-(-3)| = 2 + 4 + 6 = 12
343 |     }
344 | 
345 |     #[test]
346 |     fn test_chebyshev_metric() {
347 |         let metric = ChebyshevMetric;
348 | 
349 |         // Test with simple vectors
350 |         let a = vec![1.0, 2.0, 3.0];
351 |         let b = vec![4.0, 5.0, 6.0];
352 |         let distance = metric.distance(&a, &b);
353 |         assert_eq!(distance, 3.0); // max(|4-1|, |5-2|, |6-3|) = max(3, 3, 3) = 3
354 | 
355 |         // Test with zero distance
356 |         let c = vec![1.0, 2.0, 3.0];
357 |         let distance = metric.distance(&a, &c);
358 |         assert_eq!(distance, 0.0);
359 | 
360 |         // Test with negative values
361 |         let d = vec![-1.0, -2.0, -3.0];
362 |         let distance = metric.distance(&a, &d);
363 |         assert_eq!(distance, 6.0); // max(|1-(-1)|, |2-(-2)|, |3-(-3)|) = max(2, 4, 6) = 6
364 |     }
365 | 
366 |     #[test]
367 |     fn test_metric_encoding() {
368 |         // Test conversion from Metric to MetricEncoding and back
369 |         let l2 = Metric::L2(L2Metric);
370 |         let encoding = Metric::to_encoding(l2);
371 |         assert!(matches!(encoding, MetricEncoding::L2));
372 |         let metric = Metric::from_encoding(encoding);
373 |         assert!(matches!(metric, Metric::L2(_)));
374 | 
375 |         let cosine = Metric::Cosine(CosineMetric);
376 |         let encoding = cosine.to_encoding();
377 |         assert!(matches!(encoding, MetricEncoding::Cosine));
378 |         let metric = Metric::from_encoding(encoding);
379 |         assert!(matches!(metric, Metric::Cosine(_)));
380 | 
381 |         let manhattan = Metric::Manhattan(ManhattanMetric);
382 |         let encoding = manhattan.to_encoding();
383 |         assert!(matches!(encoding, MetricEncoding::Manhattan));
384 |         let metric = Metric::from_encoding(encoding);
385 |         assert!(matches!(metric, Metric::Manhattan(_)));
386 | 
387 |         let chebyshev = Metric::Chebyshev(ChebyshevMetric);
388 |         let encoding = chebyshev.to_encoding();
389 |         assert!(matches!(encoding, MetricEncoding::Chebyshev));
390 |         let metric = Metric::from_encoding(encoding);
391 |         assert!(matches!(metric, Metric::Chebyshev(_)));
392 |     }
393 | }
394 | 


--------------------------------------------------------------------------------
/src/apps/hnsw/mod.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | 
  3 | use bifrost::rpc::cluster::client_by_server_id;
  4 | use bifrost_hasher::hash_str;
  5 | use dovahkiin::types::Id;
  6 | use futures::{
  7 |     future::{BoxFuture, FutureExt},
  8 |     TryFutureExt,
  9 | };
 10 | use neb::{
 11 |     index::{
 12 |         builder::IndexError,
 13 |         vector::{set_vector_index_core, VectorIndexerCore},
 14 |     },
 15 | 
 16 |     server::NebServer,
 17 | };
 18 | 
 19 | 
 20 | use crate::{
 21 |     apps::hnsw::coordinator::AsyncServiceClient,
 22 |     server::MorpheusServer,
 23 | };
 24 | 
 25 | 
 26 | 
 27 | pub mod coordinator;
 28 | pub mod measurements;
 29 | pub mod partition;
 30 | 
 31 | use partition::schema::*;
 32 | 
 33 | // Re-export
 34 | pub use partition::HNSWPartitionService;
 35 | 
 36 | pub const DEFAULT_EF_CONSTRUCTION: u64 = 256;
 37 | 
 38 | pub async fn initial_app(morph: &Arc<MorpheusServer>) -> Result<(), String> {
 39 |     initialize_schemas(morph).await?;
 40 |     return Ok(());
 41 | }
 42 | 
 43 | pub fn meta_index_id(schema: u32, field_id: u64) -> Id {
 44 |     let lo_str = format!("HNSW_IDX-{}", schema);
 45 |     let hi_str = format!("HNSW_IDX-{}", field_id);
 46 |     let lo_hash = hash_str(&lo_str);
 47 |     let hi_hash = hash_str(&hi_str);
 48 |     Id::new(lo_hash, hi_hash)
 49 | }
 50 | 
 51 | pub struct VectorIndexer {
 52 |     ef_construction: u64,
 53 |     coordinator: Arc<AsyncServiceClient>,
 54 | }
 55 | 
 56 | impl VectorIndexerCore for VectorIndexer {
 57 |     fn insert(
 58 |         &self,
 59 |         cell_id: &Id,
 60 |         schema_id: u32,
 61 |         field_id: u64,
 62 |         metric_encoding: neb::index::vector::MetricEncoding,
 63 |     ) -> BoxFuture<Result<(), IndexError>> {
 64 |         self.coordinator
 65 |             .new_cell(
 66 |                 schema_id,
 67 |                 field_id,
 68 |                 *cell_id,
 69 |                 self.ef_construction,
 70 |                 metric_encoding,
 71 |             )
 72 |             .map_err(IndexError::RPCError)
 73 |             .and_then(|r| async move { r.map_err(IndexError::Other) })
 74 |             .boxed()
 75 |     }
 76 | 
 77 |     fn remove(
 78 |         &self,
 79 |         cell_id: &Id,
 80 |         _schema_id: u32,
 81 |         _field_id: u64,
 82 |     ) -> BoxFuture<Result<(), IndexError>> {
 83 |         self.coordinator
 84 |             .del_cell(*cell_id)
 85 |             .map_err(IndexError::RPCError)
 86 |             .and_then(|r| async move { r.map_err(IndexError::Other) })
 87 |             .boxed()
 88 |     }
 89 | 
 90 |     fn new_index(&self, schema_id: u32, field_id: u64) -> BoxFuture<Result<(), IndexError>> {
 91 |         let index_name = format!("HNSW-{}-{}", schema_id, field_id);
 92 |         self.coordinator
 93 |             .new_index(index_name, schema_id, field_id)
 94 |             .map_err(IndexError::RPCError)
 95 |             .and_then(|r| async move { r.map_err(IndexError::Other) })
 96 |             .boxed()
 97 |     }
 98 | 
 99 |     fn delete_index(
100 |         &self,
101 |         schema_id: u32,
102 |         field_id: u64,
103 |     ) -> BoxFuture<Result<(), neb::index::builder::IndexError>> {
104 |         self.coordinator
105 |             .delete_index(schema_id, field_id)
106 |             .map_err(IndexError::RPCError)
107 |             .and_then(|r| async move { r.map_err(IndexError::Other) })
108 |             .boxed()
109 |     }
110 | }
111 | 
112 | impl VectorIndexer {
113 |     pub async fn new(neb_server: &Arc<NebServer>) -> Self {
114 |         let server_id = neb_server.server_id;
115 |         let conshash = &neb_server.consh;
116 |         let coordinator_client = client_by_server_id(conshash, server_id).await.unwrap();
117 |         Self {
118 |             coordinator: coordinator_client,
119 |             ef_construction: DEFAULT_EF_CONSTRUCTION,
120 |         }
121 |     }
122 | 
123 |     pub fn set_core(self) {
124 |         set_vector_index_core(self);
125 |     }
126 | 
127 |     pub async fn new_and_set_core(neb_server: &Arc<NebServer>) {
128 |         let indexer = Self::new(neb_server).await;
129 |         indexer.set_core();
130 |     }
131 | 
132 |     pub fn set_ef_construction(&mut self, ef_construction: u64) {
133 |         self.ef_construction = ef_construction;
134 |     }
135 | }
136 | 
137 | unsafe impl Send for VectorIndexer {}
138 | unsafe impl Sync for VectorIndexer {}
139 | 


--------------------------------------------------------------------------------
/src/apps/hnsw/partition/index.rs:
--------------------------------------------------------------------------------
  1 | use bifrost::conshash::ConsistentHashing;
  2 | use dovahkiin::{
  3 |     data_map,
  4 |     types::{Id, OwnedPrimArray, OwnedValue},
  5 | };
  6 | use neb::ram::cell::OwnedCell;
  7 | use std::{collections::BTreeMap, sync::Arc};
  8 | 
  9 | use super::{schema::*, HNSWIndexError};
 10 | use crate::{
 11 |     apps::hnsw::measurements::HnswMetric,
 12 |     graph::GraphEngine,
 13 |     job::{
 14 |         logger::{append_job_log, JobLogLevel, JobLogger},
 15 |         JobId,
 16 |     },
 17 | };
 18 | use dovahkiin::types::Map;
 19 | pub struct HNSWIndex {
 20 |     pub id: Id,
 21 |     pub name: String,
 22 |     pub max_level: u32,
 23 |     pub schema: u32,
 24 |     pub field: u64,
 25 |     // Partition local top level vertices
 26 |     // Key is the LSH gray code and value is the vertex id
 27 |     pub top_level_vertices: BTreeMap<u64, Id>,
 28 | }
 29 | 
 30 | impl HNSWIndex {
 31 |     pub fn new(id: Id, name: String, schema: u32, field: u64) -> Self {
 32 |         Self {
 33 |             id,
 34 |             name,
 35 |             max_level: 0,
 36 |             schema,
 37 |             field,
 38 |             top_level_vertices: BTreeMap::new(),
 39 |         }
 40 |     }
 41 |     pub fn to_cell(&self) -> OwnedCell {
 42 |         let top_level = self
 43 |             .top_level_vertices
 44 |             .values()
 45 |             .cloned()
 46 |             .collect::<Vec<_>>();
 47 |         let top_level_array = OwnedPrimArray::Id(top_level);
 48 |         let map = data_map! {
 49 |             NAME: self.name.clone(),
 50 |             NUM_LEVELS: self.max_level,
 51 |             SCHEMA: self.schema,
 52 |             FIELD: self.field,
 53 |             TOP_LEVEL_VERTICES: OwnedValue::PrimArray(top_level_array)
 54 |         };
 55 |         OwnedCell::new_with_id(INDEX_SCHEMA_ID, &self.id, OwnedValue::Map(map))
 56 |     }
 57 | 
 58 |     // Add this helper method to HNSWIndex to convert metric encoding to HnswMetric implementation
 59 |     pub fn metric_to_hnsw_metric(&self) -> impl HnswMetric {
 60 |         // Default to L2 metric if not specified
 61 |         super::super::measurements::L2Metric
 62 |     }
 63 | 
 64 |     pub async fn save(
 65 |         &self,
 66 |         engine: &Arc<GraphEngine>,
 67 |         server_id: u64,
 68 |         conshash: &Arc<ConsistentHashing>,
 69 |         job_logger: &Arc<JobLogger>,
 70 |         job_id: JobId,
 71 |     ) -> Result<(), HNSWIndexError> {
 72 |         let cell_id = self.id;
 73 |         let txn_res = engine
 74 |             .neb_client()
 75 |             .transaction(|txn| {
 76 |                 let cell_id = cell_id;
 77 |                 let server_id = server_id;
 78 |                 let conshash = conshash.clone();
 79 |                 let top_level_vertices = self
 80 |                     .top_level_vertices
 81 |                     .values()
 82 |                     .cloned()
 83 |                     .collect::<Vec<_>>();
 84 |                 debug_assert!(!cell_id.is_unit_id());
 85 |                 async move {
 86 |                     let mut cell = match txn.read(cell_id).await {
 87 |                         Ok(Some(cell)) => cell,
 88 |                         Ok(None) => {
 89 |                             append_job_log(
 90 |                                 job_logger,
 91 |                                 job_id,
 92 |                                 JobLogLevel::Error,
 93 |                                 format!("Index not found: {:?}", cell_id),
 94 |                             );
 95 |                             return Ok(Err(HNSWIndexError::IndexNotFound));
 96 |                         }
 97 |                         Err(e) => return Ok(Err(HNSWIndexError::TxnError(e))),
 98 |                     };
 99 |                     let top_level = match cell[NUM_LEVELS].u32() {
100 |                         Some(top_level) => top_level,
101 |                         None => {
102 |                             return Ok(Err(HNSWIndexError::IndexFormatError(format!(
103 |                                 "Top level not found: {:?}",
104 |                                 cell[NUM_LEVELS]
105 |                             ))))
106 |                         }
107 |                     };
108 |                     let mut new_vertices = Vec::new();
109 |                     if *top_level == self.max_level {
110 |                         // Need to preserve the existing top level vertices
111 |                         let vertices = match cell[TOP_LEVEL_VERTICES].prim_array() {
112 |                             Some(OwnedPrimArray::Id(vertices)) => vertices,
113 |                             Some(_) => {
114 |                                 return Ok(Err(HNSWIndexError::IndexFormatError(format!(
115 |                                     "Top level vertices is not a list of ids: {:?}",
116 |                                     cell[TOP_LEVEL_VERTICES]
117 |                                 ))))
118 |                             }
119 |                             None => {
120 |                                 return Ok(Err(HNSWIndexError::IndexFormatError(
121 |                                     "Top level vertices not found".to_string(),
122 |                                 )))
123 |                             }
124 |                         };
125 | 
126 |                         new_vertices = vertices
127 |                             .iter()
128 |                             .filter_map(|id| {
129 |                                 let id_server_id = conshash.get_server_id(id.higher);
130 |                                 if id_server_id == Some(server_id) {
131 |                                     None
132 |                                 } else {
133 |                                     Some(*id)
134 |                                 }
135 |                             })
136 |                             .collect::<Vec<_>>();
137 |                     }
138 |                     new_vertices.extend(top_level_vertices);
139 |                     cell[TOP_LEVEL_VERTICES] =
140 |                         OwnedValue::PrimArray(OwnedPrimArray::Id(new_vertices));
141 |                     cell[NUM_LEVELS] = OwnedValue::U32(self.max_level);
142 |                     match txn.update(cell).await {
143 |                         Ok(_) => Ok(Ok(())),
144 |                         Err(e) => Ok(Err(HNSWIndexError::TxnError(e))),
145 |                     }
146 |                 }
147 |             })
148 |             .await;
149 |         match txn_res {
150 |             Ok(Ok(_)) => Ok(()),
151 |             Ok(Err(e)) => Err(e),
152 |             Err(e) => Err(HNSWIndexError::TxnError(e)),
153 |         }
154 |     }
155 | }
156 | 


--------------------------------------------------------------------------------
/src/apps/hnsw/partition/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod index;
 2 | pub mod schema;
 3 | pub mod search;
 4 | pub mod service;
 5 | #[cfg(test)]
 6 | pub mod tests;
 7 | pub mod types;
 8 | pub mod utils;
 9 | 
10 | pub use service::HNSWPartitionService;
11 | pub use types::{HNSWIndexError, SearchMetadata};
12 | 


--------------------------------------------------------------------------------
/src/apps/hnsw/partition/schema.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | 
  3 | use bifrost_hasher::hash_str;
  4 | use dovahkiin::data_map;
  5 | use dovahkiin::types::{Map, OwnedValue, Type};
  6 | use neb::ram::cell::{Cell, OwnedCell};
  7 | use neb::ram::schema::{Field, IndexType, Schema as NebSchema};
  8 | 
  9 | use crate::graph::edge;
 10 | use crate::graph::vertex::Vertex;
 11 | use crate::server::{schema::*, MorpheusServer};
 12 | 
 13 | use super::index::HNSWIndex;
 14 | use super::search::MAX_LEVEL_CAP;
 15 | 
 16 | pub const NAME: &str = "NAME";
 17 | pub const ENTRY: &str = "ENTRY";
 18 | pub const NUM_LEVELS: &str = "NUM_LEVELS";
 19 | pub const CELL: &str = "CELL";
 20 | pub const SCHEMA: &str = "SCHEMA";
 21 | pub const FIELD: &str = "FIELD";
 22 | pub const LSH: &str = "LSH";
 23 | pub const TOP_LEVEL_VERTICES: &str = "TOP_LEVEL_VERTICES";
 24 | pub const INDEX_SCHEMA_NAME: &str = "_hnsw-index";
 25 | 
 26 | pub const INDEX_SCHEMA_ID: u32 = hash_ident!(INDEX_SCHEMA_NAME) as u32;
 27 | pub const CELL_FIELD_ID: u64 = hash_ident!(CELL) as u64;
 28 | 
 29 | fn group_index_name() -> String {
 30 |     INDEX_SCHEMA_NAME.to_string()
 31 | }
 32 | 
 33 | pub fn group_index_schema_id() -> u32 {
 34 |     INDEX_SCHEMA_ID
 35 | }
 36 | 
 37 | fn edge_schema_name(name: &str, level: usize) -> String {
 38 |     format!("_hnsw-edge-{}@{}", name, level)
 39 | }
 40 | 
 41 | // One cell for each (schema, field) pair
 42 | pub fn index_schema() -> NebSchema {
 43 |     NebSchema::new_with_id(
 44 |         INDEX_SCHEMA_ID,
 45 |         &group_index_name(),
 46 |         None,
 47 |         Field::new_schema(vec![
 48 |             Field::new_unindexed(NAME, Type::String),
 49 |             Field::new_unindexed(NUM_LEVELS, Type::U32),
 50 |             Field::new_unindexed(SCHEMA, Type::U32),
 51 |             Field::new_unindexed(FIELD, Type::U64),
 52 |             Field::new_unindexed_array(TOP_LEVEL_VERTICES, Type::Id),
 53 |         ]),
 54 |         false,
 55 |         true, // Enable enumeration
 56 |     )
 57 | }
 58 | 
 59 | pub fn index_cell(index: &HNSWIndex) -> OwnedCell {
 60 |     let id = index.id;
 61 |     OwnedCell::new_with_id(
 62 |         INDEX_SCHEMA_ID,
 63 |         &id,
 64 |         OwnedValue::Map(data_map!(
 65 |             NAME: index.name.clone(),
 66 |             NUM_LEVELS: index.max_level as u32,
 67 |             SCHEMA: index.schema as u32,
 68 |             FIELD: index.field,
 69 |             TOP_LEVEL_VERTICES: index.top_level_vertices.values().cloned().collect::<Vec<_>>()
 70 |         )),
 71 |     )
 72 | }
 73 | 
 74 | fn edge_schema(name: &str, level: usize) -> MorpheusSchema {
 75 |     let edge_schema_name = edge_schema_name(name, level);
 76 |     let edge_schema_id = hash_str(&edge_schema_name) as u32;
 77 |     MorpheusSchema::new_edge_with_id(
 78 |         edge_schema_id,
 79 |         &edge_schema_name,
 80 |         None,
 81 |         &EMPTY_FIELDS,
 82 |         edge::EdgeAttributes::new(edge::EdgeType::Undirected, false),
 83 |         false,
 84 |     )
 85 | }
 86 | 
 87 | pub const HNSW_VERTEX_SCHEMA_ID: u32 = hash_ident!("_hnsw_vertex");
 88 | 
 89 | pub fn hnsw_vertex_schema() -> MorpheusSchema {
 90 |     MorpheusSchema::new_vertex_with_id(
 91 |         HNSW_VERTEX_SCHEMA_ID,
 92 |         "_hnsw_vertex",
 93 |         None,
 94 |         &vec![Field::new_indexed(CELL, Type::Id, vec![IndexType::Hashed])],
 95 |         false,
 96 |     )
 97 | }
 98 | 
 99 | pub fn cell_vertex<C: Cell>(cell: &C) -> Vertex {
100 |     Vertex::new(HNSW_VERTEX_SCHEMA_ID, data_map!(CELL: cell.id()))
101 | }
102 | 
103 | const MAX_LEVEL: usize = MAX_LEVEL_CAP;
104 | const DEFAULT_LEVEL_NAME: &str = "DEFAULT_LEVEL";
105 | 
106 | lazy_static! {
107 |     pub static ref LEVEL_SCHEMAS: Vec<MorpheusSchema> = {
108 |         (0..MAX_LEVEL)
109 |             .map(|level| edge_schema(DEFAULT_LEVEL_NAME, level))
110 |             .collect()
111 |     };
112 | }
113 | 
114 | pub async fn initialize_schemas(morph: &Arc<MorpheusServer>) -> Result<(), String> {
115 |     let neb = morph.neb_client.as_ref();
116 |     for level_schema in LEVEL_SCHEMAS.iter() {
117 |         let schema_id = level_schema.id;
118 |         if neb.schema_client.get(&schema_id).await.unwrap().is_none() {
119 |             morph
120 |                 .schema_container
121 |                 .new_schema(level_schema.clone())
122 |                 .await
123 |                 .map_err(|e| {
124 |                     format!(
125 |                         "Failed to create level {} schema: {:?}",
126 |                         level_schema.name, e
127 |                     )
128 |                 })?;
129 |         }
130 |     }
131 |     if neb
132 |         .schema_client
133 |         .get(&HNSW_VERTEX_SCHEMA_ID)
134 |         .await
135 |         .unwrap()
136 |         .is_none()
137 |     {
138 |         morph
139 |             .schema_container
140 |             .new_schema(hnsw_vertex_schema())
141 |             .await
142 |             .map_err(|e| format!("Failed to create HNSW vertex schema: {:?}", e))?;
143 |     }
144 |     if neb
145 |         .schema_client
146 |         .get(&INDEX_SCHEMA_ID)
147 |         .await
148 |         .unwrap()
149 |         .is_none()
150 |     {
151 |         neb.new_schema_with_id(index_schema())
152 |             .await
153 |             .map_err(|e| format!("Failed to create index schema: {:?}", e))?
154 |             .map_err(|e| format!("Failed to create index schema: {:?}", e))?;
155 |     }
156 |     Ok(())
157 | }
158 | 


--------------------------------------------------------------------------------
/src/apps/hnsw/partition/service.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     collections::{BTreeMap, HashSet},
  3 |     future, mem,
  4 |     sync::Arc,
  5 | };
  6 | 
  7 | use bifrost::{
  8 |     conshash::ConsistentHashing, dispatch_rpc_service_functions, raft::client::RaftClient, service,
  9 |     service_with_id,
 10 | };
 11 | use dovahkiin::types::{Id, OwnedPrimArray};
 12 | use futures::{future::BoxFuture, FutureExt};
 13 | use itertools::Itertools;
 14 | use lightning::map::{Map, PtrHashMap};
 15 | use neb::ram::chunk::Chunks;
 16 | 
 17 | use crate::{
 18 |     apps::hnsw::measurements::{Metric, MetricEncoding},
 19 |     graph::GraphEngine,
 20 |     job::{
 21 |         logger::{append_job_log, JobLogLevel, JobLogger},
 22 |         JobId,
 23 |     },
 24 |     traversal::navigation::Distance,
 25 | };
 26 | 
 27 | use super::{
 28 |     index::HNSWIndex,
 29 |     schema::LEVEL_SCHEMAS,
 30 |     search::{HnswOnlinePartition, PartitionSearch},
 31 |     types::OrderedFloat,
 32 | };
 33 | 
 34 | pub use super::service::service::HNSWPartitionService;
 35 | 
 36 | pub const HNSW_PARTITION_SERVICE_ID: u64 = hash_ident!("HNSW_PARTITION_SERVICE");
 37 | 
 38 | pub mod service {
 39 |     use crate::{apps::hnsw::measurements::MetricEncoding, job::JobId};
 40 |     use bifrost::service;
 41 | 
 42 |     use super::*;
 43 | 
 44 |     service! {
 45 |         // For a partition to do anything with a vector, first start a job
 46 |         rpc new_job(job_id: JobId, schema: u32, field_id: u64, query: OwnedPrimArray, k: u64, ef: u64, ef_construction: u64, metric: MetricEncoding) -> Result<(), String>;
 47 |         rpc end_job(job_id: JobId) -> bool;
 48 | 
 49 |         // Then use multiple iterations (or not) to do the actual search
 50 |         // It returns remove frontiers to be processed by other partitions
 51 |         rpc next_iteration(readonly: bool, job_id: JobId) -> Result<HashSet<Id>, String>;
 52 |         rpc set_frontiers(job_id: JobId, frontiers: Vec<Id>) -> Result<(), String>;
 53 | 
 54 |         // After a search is completed, it can call following functions
 55 |         rpc index_cell(job_id: JobId, cell_id: Id) -> Result<(), String>;
 56 |         rpc top_k(job_id: JobId, k: u32) -> Result<Vec<(Id, Distance)>, String>;
 57 |         rpc top(job_id: JobId) -> Result<(Id, Distance), String>;
 58 | 
 59 |         rpc get_last_result(job_id: JobId) -> Result<Vec<(Id, Distance)>, String>;
 60 | 
 61 |         // Miscs
 62 |         rpc new_index(id: Id, name: String, schema: u32, field_id: u64) -> Result<(), String>;
 63 |     }
 64 | 
 65 |     pub struct HNSWPartitionService {
 66 |         pub partition: HnswOnlinePartition,
 67 |         pub job_logger: Arc<JobLogger>,
 68 |         pub jobs: PtrHashMap<JobId, Arc<async_std::sync::Mutex<PartitionSearch>>>,
 69 |     }
 70 | 
 71 |     dispatch_rpc_service_functions!(HNSWPartitionService);
 72 |     service_with_id!(HNSWPartitionService, HNSW_PARTITION_SERVICE_ID);
 73 | }
 74 | 
 75 | impl service::HNSWPartitionService {
 76 |     pub async fn new(
 77 |         server_id: u64,
 78 |         conshash: &Arc<ConsistentHashing>,
 79 |         raft_client: &Arc<RaftClient>,
 80 |         chunks: &Arc<Chunks>,
 81 |         engine: &Arc<GraphEngine>,
 82 |         job_logger: &Arc<JobLogger>,
 83 |     ) -> Result<Self, String> {
 84 |         let partition = HnswOnlinePartition::new(server_id, conshash, raft_client, chunks, engine)
 85 |             .await
 86 |             .map_err(|e| format!("Failed to create HnswOnlinePartition: {:?}", e))?;
 87 |         Ok(Self {
 88 |             partition,
 89 |             job_logger: job_logger.clone(),
 90 |             jobs: PtrHashMap::with_capacity(32),
 91 |         })
 92 |     }
 93 | }
 94 | 
 95 | impl service::Service for service::HNSWPartitionService {
 96 |     fn new_job<'a>(
 97 |         &'a self,
 98 |         job_id: JobId,
 99 |         schema: u32,
100 |         field_id: u64,
101 |         query: OwnedPrimArray,
102 |         k: u64,
103 |         ef: u64,
104 |         ef_construction: u64,
105 |         metric: MetricEncoding,
106 |     ) -> BoxFuture<'a, Result<(), String>> {
107 |         async move {
108 |             self.partition
109 |                 .new_search(
110 |                     schema,
111 |                     field_id,
112 |                     query,
113 |                     k as usize,
114 |                     ef as usize,
115 |                     ef_construction as usize,
116 |                     metric,
117 |                 )
118 |                 .await
119 |                 .map(|job| {
120 |                     let job = Arc::new(async_std::sync::Mutex::new(job));
121 |                     self.jobs.insert(job_id, job);
122 |                 })
123 |                 .map_err(|e| format!("Failed to create job: {:?}", e))
124 |         }
125 |         .boxed()
126 |     }
127 | 
128 |     fn end_job<'a>(&'a self, job_id: JobId) -> BoxFuture<'a, bool> {
129 |         let result = self.jobs.remove(&job_id).is_some();
130 |         append_job_log(
131 |             &self.job_logger,
132 |             job_id,
133 |             JobLogLevel::Info,
134 |             format!("Job ended {} for {:?}", result, job_id),
135 |         );
136 |         future::ready(result).boxed()
137 |     }
138 | 
139 |     fn get_last_result<'a>(
140 |         &'a self,
141 |         job_id: JobId,
142 |     ) -> BoxFuture<'a, Result<Vec<(Id, Distance)>, String>> {
143 |         async move {
144 |             let job = match self
145 |                 .jobs
146 |                 .get(&job_id)
147 |                 .ok_or(format!("Job {:?} not found", job_id))
148 |             {
149 |                 Ok(job) => job,
150 |                 Err(msg) => {
151 |                     append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone());
152 |                     return Err(msg);
153 |                 }
154 |             };
155 |             let job = job.lock().await;
156 |             Ok(job.history.to_vec())
157 |         }
158 |         .boxed()
159 |     }
160 | 
161 |     fn set_frontiers<'a>(
162 |         &'a self,
163 |         job_id: JobId,
164 |         frontiers: Vec<Id>,
165 |     ) -> BoxFuture<'a, Result<(), String>> {
166 |         async move {
167 |             let job = match self
168 |                 .jobs
169 |                 .get(&job_id)
170 |                 .ok_or(format!("Job {:?} not found", job_id))
171 |             {
172 |                 Ok(job) => job,
173 |                 Err(msg) => {
174 |                     append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone());
175 |                     return Err(msg);
176 |                 }
177 |             };
178 |             let mut job = job.lock().await;
179 |             job.frontier = frontiers;
180 |             Ok(())
181 |         }
182 |         .boxed()
183 |     }
184 | 
185 |     fn next_iteration<'a>(
186 |         &'a self,
187 |         readonly: bool,
188 |         job_id: JobId,
189 |     ) -> BoxFuture<'a, Result<HashSet<Id>, String>> {
190 |         async move {
191 |             let job = match self
192 |                 .jobs
193 |                 .get(&job_id)
194 |                 .ok_or(format!("Job {:?} not found", job_id))
195 |             {
196 |                 Ok(job) => job,
197 |                 Err(msg) => {
198 |                     append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone());
199 |                     return Err(msg);
200 |                 }
201 |             };
202 |             let mut job = job.lock().await;
203 |             let metric = Metric::from_encoding(job.metric);
204 |             let logger = &self.job_logger;
205 |             let result = match metric {
206 |                 Metric::L2(metric) => {
207 |                     self.partition
208 |                         .next_iteration(&mut job, metric, readonly, logger, job_id)
209 |                         .await
210 |                 }
211 |                 Metric::Cosine(metric) => {
212 |                     self.partition
213 |                         .next_iteration(&mut job, metric, readonly, logger, job_id)
214 |                         .await
215 |                 }
216 |                 Metric::Manhattan(metric) => {
217 |                     self.partition
218 |                         .next_iteration(&mut job, metric, readonly, logger, job_id)
219 |                         .await
220 |                 }
221 |                 Metric::Chebyshev(metric) => {
222 |                     self.partition
223 |                         .next_iteration(&mut job, metric, readonly, logger, job_id)
224 |                         .await
225 |                 }
226 |             };
227 |             match result {
228 |                 Ok(_) => {
229 |                     job.last_distance = job.metadata.last_distance;
230 |                     job.metadata.last_distance = Distance::INFINITY;
231 |                     Ok(mem::take(&mut job.metadata.remote_frontiers))
232 |                 }
233 |                 Err(e) => {
234 |                     let msg = format!("Failed to run next iteration: {:?}", e);
235 |                     append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone());
236 |                     Err(msg)
237 |                 }
238 |             }
239 |         }
240 |         .boxed()
241 |     }
242 | 
243 |     fn index_cell<'a>(&'a self, job_id: JobId, cell_id: Id) -> BoxFuture<'a, Result<(), String>> {
244 |         async move {
245 |             self.new_vertex(job_id, cell_id)
246 |                 .await
247 |                 .map_err(|e| format!("Failed to index cell: {:?}", e))
248 |                 .map(|_| ())
249 |         }
250 |         .boxed()
251 |     }
252 | 
253 |     fn top_k<'a>(
254 |         &'a self,
255 |         job_id: JobId,
256 |         k: u32,
257 |     ) -> BoxFuture<'a, Result<Vec<(Id, Distance)>, String>> {
258 |         async move {
259 |             let results = self.bfs_search(job_id, 0, k).await?;
260 |             Ok(results
261 |                 .into_iter()
262 |                 .map(|(_vid, id, distance)| (id, distance))
263 |                 .collect())
264 |         }
265 |         .boxed()
266 |     }
267 | 
268 |     fn top<'a>(&'a self, job_id: JobId) -> BoxFuture<'a, Result<(Id, Distance), String>> {
269 |         async move {
270 |             let search = match self
271 |                 .jobs
272 |                 .get(&job_id)
273 |                 .ok_or(format!("Job {:?} not found", job_id))
274 |             {
275 |                 Ok(job) => job,
276 |                 Err(msg) => {
277 |                     append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone());
278 |                     return Err(msg);
279 |                 }
280 |             };
281 |             let search = search.lock().await;
282 |             let result = search
283 |                 .history
284 |                 .iter()
285 |                 .sorted_by_key(|(_, distance)| OrderedFloat(*distance))
286 |                 .next()
287 |                 .ok_or("No results found")?;
288 |             Ok(result.clone())
289 |         }
290 |         .boxed()
291 |     }
292 | 
293 |     fn new_index<'a>(
294 |         &'a self,
295 |         id: Id,
296 |         name: String,
297 |         schema: u32,
298 |         field_id: u64,
299 |     ) -> ::futures::future::BoxFuture<'a, Result<(), String>> {
300 |         let index = HNSWIndex::new(id, name, schema, field_id);
301 |         self.partition.indices.insert(
302 |             (schema, field_id),
303 |             Arc::new(async_std::sync::RwLock::new(index)),
304 |         );
305 |         future::ready(Ok(())).boxed()
306 |     }
307 | }
308 | 
309 | // Partition coordination
310 | impl HNSWPartitionService {
311 |     async fn bfs_search<'a>(
312 |         &'a self,
313 |         job_id: JobId,
314 |         level: u32,
315 |         k: u32,
316 |     ) -> Result<Vec<(Id, Id, Distance)>, String> {
317 |         let job = match self
318 |             .jobs
319 |             .get(&job_id)
320 |             .ok_or(format!("Job {:?} not found", job_id))
321 |         {
322 |             Ok(job) => job,
323 |             Err(msg) => {
324 |                 append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone());
325 |                 return Err(msg);
326 |             }
327 |         };
328 |         let job = job.lock().await;
329 |         let level_schema = LEVEL_SCHEMAS[level as usize].id;
330 |         let field_id = job.field_id;
331 |         let metric = job.metric;
332 |         append_job_log(
333 |             &self.job_logger,
334 |             job_id,
335 |             JobLogLevel::Info,
336 |             format!("Starting BFS search for job {:?}", job_id),
337 |         );
338 |         match self
339 |             .partition
340 |             .search_top_k(
341 |                 level_schema,
342 |                 field_id,
343 |                 k as usize,
344 |                 metric,
345 |                 &job.metadata,
346 |                 &self.job_logger,
347 |                 job_id,
348 |             )
349 |             .await
350 |         {
351 |             Ok(results) => {
352 |                 append_job_log(
353 |                     &self.job_logger,
354 |                     job_id,
355 |                     JobLogLevel::Info,
356 |                     format!(
357 |                         "BFS search completed for job {:?} with {} results",
358 |                         job_id,
359 |                         results.len()
360 |                     ),
361 |                 );
362 |                 Ok(results)
363 |             }
364 |             Err(e) => {
365 |                 let msg = format!("BFS search failed: {:?}", e);
366 |                 append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone());
367 |                 Err(msg)
368 |             }
369 |         }
370 |     }
371 | 
372 |     async fn new_vertex<'a>(&'a self, job_id: JobId, cell_id: Id) -> Result<Id, String> {
373 |         let job = match self
374 |             .jobs
375 |             .get(&job_id)
376 |             .ok_or(format!("Job {:?} not found", job_id))
377 |         {
378 |             Ok(job) => job,
379 |             Err(msg) => {
380 |                 append_job_log(&self.job_logger, job_id, JobLogLevel::Error, msg.clone());
381 |                 return Err(msg);
382 |             }
383 |         };
384 |         let job = job.lock().await;
385 |         debug_assert_eq!(
386 |             self.partition.conshash.get_server_id(cell_id.higher),
387 |             Some(self.partition.server_id)
388 |         );
389 |         let metric = Metric::from_encoding(job.metric);
390 |         let max_level = job.max_level;
391 |         let result = match metric {
392 |             Metric::L2(metric) => self
393 |                 .partition
394 |                 .new_vertex(
395 |                     job_id,
396 |                     cell_id,
397 |                     job.schema,
398 |                     job.field_id,
399 |                     metric,
400 |                     &job.metadata,
401 |                     max_level,
402 |                     &self.job_logger,
403 |                 )
404 |                 .await
405 |                 .map_err(|e| format!("Failed to create vertex: {:?}", e)),
406 |             Metric::Cosine(metric) => self
407 |                 .partition
408 |                 .new_vertex(
409 |                     job_id,
410 |                     cell_id,
411 |                     job.schema,
412 |                     job.field_id,
413 |                     metric,
414 |                     &job.metadata,
415 |                     max_level,
416 |                     &self.job_logger,
417 |                 )
418 |                 .await
419 |                 .map_err(|e| format!("Failed to create vertex: {:?}", e)),
420 |             Metric::Chebyshev(metric) => self
421 |                 .partition
422 |                 .new_vertex(
423 |                     job_id,
424 |                     cell_id,
425 |                     job.schema,
426 |                     job.field_id,
427 |                     metric,
428 |                     &job.metadata,
429 |                     max_level,
430 |                     &self.job_logger,
431 |                 )
432 |                 .await
433 |                 .map_err(|e| format!("Failed to create vertex: {:?}", e)),
434 |             Metric::Manhattan(metric) => self
435 |                 .partition
436 |                 .new_vertex(
437 |                     job_id,
438 |                     cell_id,
439 |                     job.schema,
440 |                     job.field_id,
441 |                     metric,
442 |                     &job.metadata,
443 |                     max_level,
444 |                     &self.job_logger,
445 |                 )
446 |                 .await
447 |                 .map_err(|e| format!("Failed to create vertex: {:?}", e)),
448 |         };
449 |         result
450 |     }
451 | }
452 | 


--------------------------------------------------------------------------------
/src/apps/hnsw/partition/types.rs:
--------------------------------------------------------------------------------
 1 | use std::cell::RefCell;
 2 | use std::collections::{BTreeMap, HashSet};
 3 | use std::rc::Rc;
 4 | 
 5 | use ahash::HashMap;
 6 | use bifrost::rpc::RPCError;
 7 | use dovahkiin::types::{Id, OwnedPrimArray};
 8 | use neb::ram::cell::ReadError;
 9 | use neb::{client::transaction::TxnError, ram::cell::WriteError};
10 | 
11 | use crate::graph::{edge::EdgeError, NeighbourhoodError};
12 | use crate::traversal::navigation::Distance;
13 | use crate::utils::ring_buffer::RingBuffer;
14 | 
15 | /// A wrapper for f32 that implements Ord by using partial_cmp and treating NaN values as equal
16 | #[derive(Debug, Copy, Clone, PartialEq, PartialOrd)]
17 | pub struct OrderedFloat(pub f32);
18 | 
19 | impl Eq for OrderedFloat {}
20 | 
21 | impl Ord for OrderedFloat {
22 |     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
23 |         self.partial_cmp(other).unwrap_or(std::cmp::Ordering::Equal)
24 |     }
25 | }
26 | 
27 | pub struct ReverseOrd<T: Ord>(pub T);
28 | impl<T: Ord> Ord for ReverseOrd<T> {
29 |     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
30 |         other.0.cmp(&self.0) // Reversed comparison
31 |     }
32 | }
33 | impl<T: Ord> PartialOrd for ReverseOrd<T> {
34 |     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
35 |         Some(self.cmp(other))
36 |     }
37 | }
38 | impl<T: Ord> PartialEq for ReverseOrd<T> {
39 |     fn eq(&self, other: &Self) -> bool {
40 |         self.0 == other.0
41 |     }
42 | }
43 | impl<T: Ord> Eq for ReverseOrd<T> {}
44 | 
45 | #[derive(Debug)]
46 | pub enum HNSWIndexError {
47 |     RPCError(RPCError),
48 |     IndexFormatError(String),
49 |     IndexNotFound,
50 |     EdgeError(EdgeError),
51 |     TxnError(TxnError),
52 |     NeighbourhoodError(NeighbourhoodError),
53 |     WriteError(WriteError),
54 |     ReadError(ReadError),
55 |     UpdateError(TxnError),
56 | }
57 | 
58 | pub struct SearchMetadata {
59 |     pub level: usize,
60 |     pub visited: HashSet<Id>,
61 |     pub history: RingBuffer<(Id, Distance)>,
62 |     pub level_entries: ahash::HashMap<usize, Id>,
63 |     pub remote_frontiers: HashSet<Id>,
64 |     pub last_distance: Distance,
65 |     pub vertex_distance_cache: RefCell<BTreeMap<(Id, Id), Distance>>,
66 |     pub vertex_cache: RefCell<BTreeMap<Id, (Id, Id)>>,
67 |     pub vertex_vector_cache: RefCell<BTreeMap<Id, Rc<OwnedPrimArray>>>,
68 |     pub ef: usize, // Extension factor for search, controls exploration vs. exploitation
69 |     pub ef_construction: usize, // Extension factor for construction, controls how many neighbors to consider when building the graph
70 | }
71 | 
72 | unsafe impl Sync for SearchMetadata {}
73 | unsafe impl Send for SearchMetadata {}
74 | 
75 | impl SearchMetadata {
76 |     pub fn set_query_vector(&self, query: OwnedPrimArray) {
77 |         self.vertex_vector_cache
78 |             .borrow_mut()
79 |             .insert(Id::unit_id(), Rc::new(query));
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/apps/hnsw/partition/utils.rs:
--------------------------------------------------------------------------------
 1 | use crate::apps::hnsw::partition::types::HNSWIndexError;
 2 | use crate::graph::partitioner::vector::FromVectorPartitioner;
 3 | use crate::traversal::navigation::Distance;
 4 | use crate::utils::ring_buffer::RingBuffer;
 5 | use dovahkiin::types::Id;
 6 | use neb::ram::cell::OwnedCell;
 7 | use rand::Rng;
 8 | 
 9 | /// Generate a level based on geometric distribution with P = 1/M
10 | /// This matches standard HNSW behavior.
11 | pub fn generate_random_level(prob: f64, max_level_cap: usize) -> usize {
12 |     let mut rng = rand::rng();
13 |     let mut level = 0;
14 |     while rng.random::<f64>() < prob && level < max_level_cap {
15 |         level += 1;
16 |     }
17 |     level
18 | }
19 | 
20 | // Helper function to determine the maximum connections for a level
21 | pub fn get_max_connections_for_level(level: usize) -> usize {
22 |     if level == 0 {
23 |         // More connections at the lowest level
24 |         16
25 |     } else {
26 |         // Fewer connections at higher levels
27 |         8
28 |     }
29 | }
30 | 
31 | pub fn merge_history(
32 |     history: &RingBuffer<(Id, Distance)>,
33 |     new_history: &RingBuffer<(Id, Distance)>,
34 | ) -> RingBuffer<(Id, Distance)> {
35 |     history.merge_sorted(new_history, |a, b| b.1.partial_cmp(&a.1).unwrap())
36 | }
37 | 
38 | pub fn data_partition_key(field_id: u64, cell: &OwnedCell) -> Result<u64, HNSWIndexError> {
39 |     let partitioner = FromVectorPartitioner::new(field_id);
40 |     let key = partitioner.vertex_partition_key(cell);
41 |     if key == 0 {
42 |         Err(HNSWIndexError::IndexFormatError(format!(
43 |             "Index {:?} has no partition key",
44 |             cell
45 |         )))
46 |     } else {
47 |         Ok(key)
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/apps/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod hnsw;
2 | 


--------------------------------------------------------------------------------
/src/config/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::{server::MorphesOptions, utils::file::slurp};
 2 | use serde_yaml;
 3 | 
 4 | pub fn options_from_file<'a>(file: &'a str) -> MorphesOptions {
 5 |     let file_text = slurp(file).unwrap();
 6 |     let mut config: MorphesOptions = serde_yaml::from_str(&file_text).unwrap();
 7 |     config.storage.memory_size *= 1024 * 1024;
 8 |     return config;
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/graph/edge/bilateral.rs:
--------------------------------------------------------------------------------
  1 | use dovahkiin::types::{Map, OwnedMap, OwnedValue};
  2 | use futures::future::BoxFuture;
  3 | use futures::{FutureExt, TryFutureExt};
  4 | use neb::client::transaction::{Transaction, TxnError};
  5 | use neb::ram::cell::OwnedCell;
  6 | use neb::ram::types::Id;
  7 | use std::future;
  8 | use std::sync::Arc;
  9 | 
 10 | use super::super::id_list::IdList;
 11 | use super::{EdgeAttributes, EdgeError, TEdge};
 12 | use crate::server::schema::{GraphSchema, SchemaContainer};
 13 | 
 14 | use rand::prelude::*;
 15 | 
 16 | pub trait BilateralEdge: TEdge + Sync + Send {
 17 |     fn vertex_a_field() -> u64;
 18 |     fn vertex_b_field() -> u64;
 19 | 
 20 |     fn vertex_a(&self) -> &Id;
 21 |     fn vertex_b(&self) -> &Id;
 22 | 
 23 |     fn edge_a_field() -> u64;
 24 |     fn edge_b_field() -> u64;
 25 | 
 26 |     fn build_edge(a_field: Id, b_field: Id, schema_id: u32, cell: Option<OwnedCell>) -> Self::Edge;
 27 |     fn edge_cell(&self) -> &Option<OwnedCell>;
 28 |     fn schema_id(&self) -> u32;
 29 | 
 30 |     fn into_cell(self) -> Option<OwnedCell>;
 31 |     fn edge_cell_mut(&mut self) -> Option<&mut OwnedCell>;
 32 | 
 33 |     fn from_id<'a>(
 34 |         vertex_id: Id,
 35 |         edge_attrs: &'a EdgeAttributes,
 36 |         edge_schema_id: u32,
 37 |         txn: &'a Transaction,
 38 |         id: Id,
 39 |     ) -> BoxFuture<'a, Result<Result<Self::Edge, EdgeError>, TxnError>> {
 40 |         // println!("getting edge from id: {:?}, schema_id: {:?}", id, schema_id);
 41 |         if edge_attrs.has_body {
 42 |             return txn
 43 |                 .read(id)
 44 |                 .map_ok(move |trace_cell| {
 45 |                     let trace_cell = match trace_cell {
 46 |                         Some(cell) => cell,
 47 |                         None => return Err(EdgeError::CellNotFound),
 48 |                     };
 49 |                     let mut a_id = Id::unit_id();
 50 |                     let mut b_id = Id::unit_id();
 51 |                     let edge_cell = if edge_attrs.edge_type == Self::edge_type() {
 52 |                         if let (&OwnedValue::Id(e_a_id), &OwnedValue::Id(e_b_id)) = (
 53 |                             &trace_cell.data[Self::edge_a_field()],
 54 |                             &trace_cell.data[Self::edge_b_field()],
 55 |                         ) {
 56 |                             a_id = e_a_id;
 57 |                             b_id = e_b_id;
 58 |                         }
 59 |                         Some(trace_cell)
 60 |                     } else {
 61 |                         return Err(EdgeError::WrongEdgeType);
 62 |                     };
 63 |                     Ok(Self::build_edge(a_id, b_id, edge_schema_id, edge_cell))
 64 |                 })
 65 |                 .boxed();
 66 |         } else {
 67 |             let a_id = vertex_id;
 68 |             let b_id = id;
 69 |             let res = Ok(Ok(Self::build_edge(a_id, b_id, edge_schema_id, None)));
 70 |             future::ready(res).boxed()
 71 |         }
 72 |     }
 73 | 
 74 |     fn link<'a>(
 75 |         vertex_a_id: Id,
 76 |         vertex_b_id: Id,
 77 |         body: &'a Option<OwnedMap>,
 78 |         txn: &'a Transaction,
 79 |         schema_id: u32,
 80 |         edge_attrs: &'a EdgeAttributes,
 81 |     ) -> BoxFuture<'a, Result<Result<Self::Edge, EdgeError>, TxnError>> {
 82 |         async move {
 83 |             let vertex_a_pointer;
 84 |             let vertex_b_pointer;
 85 |             let edge_cell = {
 86 |                 if edge_attrs.edge_type != Self::edge_type() {
 87 |                     return Ok(Err(EdgeError::WrongEdgeType));
 88 |                 }
 89 |                 if edge_attrs.has_body {
 90 |                     if let Some(body_map) = body {
 91 |                         let edge_id_lower = {
 92 |                             let mut rng = rand::rng();
 93 |                             rng.next_u64()
 94 |                         };
 95 |                         let mut edge_body_cell = OwnedCell::new_with_id(
 96 |                             schema_id,
 97 |                             &Id::new(vertex_a_id.higher, edge_id_lower),
 98 |                             OwnedValue::Map(body_map.owned()),
 99 |                         );
100 |                         let edge_body_id = edge_body_cell.id();
101 |                         edge_body_cell.data[Self::edge_a_field()] = OwnedValue::Id(vertex_a_id);
102 |                         edge_body_cell.data[Self::edge_b_field()] = OwnedValue::Id(vertex_b_id);
103 |                         txn.write(edge_body_cell.clone()).await?;
104 |                         vertex_a_pointer = edge_body_id;
105 |                         vertex_b_pointer = edge_body_id;
106 |                         Some(edge_body_cell)
107 |                     } else {
108 |                         return Ok(Err(EdgeError::NormalEdgeShouldHaveBody));
109 |                     }
110 |                 } else {
111 |                     if body.is_none() {
112 |                         vertex_a_pointer = vertex_b_id;
113 |                         vertex_b_pointer = vertex_a_id;
114 |                         None
115 |                     } else {
116 |                         return Ok(Err(EdgeError::SimpleEdgeShouldNotHaveBody));
117 |                     }
118 |                 }
119 |             };
120 |             // Add vertex_a_pointer to vertex_a's id list
121 |             let a_result =
122 |                 IdList::from_txn_and_container(txn, vertex_a_id, Self::vertex_a_field(), schema_id)
123 |                     .add(&vertex_a_pointer)
124 |                     .await?
125 |                     .map_err(EdgeError::IdListError);
126 |             if let Err(e) = a_result {
127 |                 return Ok(Err(e));
128 |             }
129 | 
130 |             // Add vertex_b_pointer to vertex_b's id list
131 |             let b_result =
132 |                 IdList::from_txn_and_container(txn, vertex_b_id, Self::vertex_b_field(), schema_id)
133 |                     .add(&vertex_b_pointer)
134 |                     .await?
135 |                     .map_err(EdgeError::IdListError);
136 |             if let Err(e) = b_result {
137 |                 return Ok(Err(e));
138 |             }
139 | 
140 |             Ok(Ok(Self::build_edge(
141 |                 vertex_a_id,
142 |                 vertex_b_id,
143 |                 schema_id,
144 |                 edge_cell,
145 |             )))
146 |         }
147 |         .boxed()
148 |     }
149 | 
150 |     fn remove<'a>(
151 |         &'a self,
152 |         txn: &'a Transaction,
153 |     ) -> BoxFuture<'a, Result<Result<(), EdgeError>, TxnError>> {
154 |         async move {
155 |             let (v_a_removal, v_b_removal) = match self.edge_cell() {
156 |                 &Some(ref cell) => {
157 |                     txn.remove(cell.id()).await?;
158 |                     (cell.id(), cell.id())
159 |                 }
160 |                 &None => (*self.vertex_b(), *self.vertex_a()),
161 |             };
162 |             match IdList::from_txn_and_container(
163 |                 txn,
164 |                 *self.vertex_a(),
165 |                 Self::vertex_a_field(),
166 |                 self.schema_id(),
167 |             )
168 |             .remove(&v_a_removal, false)
169 |             .await?
170 |             .map_err(EdgeError::IdListError)
171 |             {
172 |                 Err(e) => return Ok(Err(e)),
173 |                 _ => {}
174 |             }
175 |             match IdList::from_txn_and_container(
176 |                 txn,
177 |                 *self.vertex_b(),
178 |                 Self::vertex_b_field(),
179 |                 self.schema_id(),
180 |             )
181 |             .remove(&v_b_removal, false)
182 |             .await?
183 |             .map_err(EdgeError::IdListError)
184 |             {
185 |                 Err(e) => return Ok(Err(e)),
186 |                 _ => {}
187 |             }
188 |             Ok(Ok(()))
189 |         }
190 |         .boxed()
191 |     }
192 | 
193 |     fn oppisite_vertex_id(&self, vertex_id: &Id) -> Option<&Id> {
194 |         let v1_id = self.vertex_a();
195 |         let v2_id = self.vertex_b();
196 |         if v1_id == vertex_id {
197 |             Some(v2_id)
198 |         } else if v2_id == vertex_id {
199 |             Some(v1_id)
200 |         } else {
201 |             None
202 |         }
203 |     }
204 | 
205 |     fn vertex_ids(&self) -> (Id, Id) {
206 |         (*self.vertex_a(), *self.vertex_b())
207 |     }
208 | 
209 |     fn cell_id(&self) -> Option<Id> {
210 |         self.edge_cell().as_ref().map(|cell| cell.id())
211 |     }
212 | }
213 | 


--------------------------------------------------------------------------------
/src/graph/edge/directed.rs:
--------------------------------------------------------------------------------
 1 | use dovahkiin::types::Type;
 2 | use neb::ram::cell::OwnedCell;
 3 | use neb::ram::schema::Field;
 4 | use neb::ram::types::Id;
 5 | use neb::ram::types::OwnedValue;
 6 | 
 7 | use super::bilateral::BilateralEdge;
 8 | use super::{EdgeType, TEdge};
 9 | use crate::graph::fields::*;
10 | 
11 | lazy_static! {
12 |     pub static ref EDGE_TEMPLATE: Vec<Field> = vec![
13 |         Field::new_unindexed(&*INBOUND_NAME, Type::Id),
14 |         Field::new_unindexed(&*OUTBOUND_NAME, Type::Id),
15 |     ];
16 | }
17 | 
18 | #[derive(Debug)]
19 | pub struct DirectedEdge {
20 |     inbound_id: Id,
21 |     outbound_id: Id,
22 |     schema_id: u32,
23 |     pub cell: Option<OwnedCell>,
24 | }
25 | 
26 | impl TEdge for DirectedEdge {
27 |     type Edge = DirectedEdge;
28 |     fn edge_type() -> EdgeType {
29 |         EdgeType::Directed
30 |     }
31 | }
32 | 
33 | impl BilateralEdge for DirectedEdge {
34 |     fn vertex_a_field() -> u64 {
35 |         *OUTBOUND_KEY_ID
36 |     }
37 | 
38 |     fn vertex_b_field() -> u64 {
39 |         *INBOUND_KEY_ID
40 |     }
41 | 
42 |     fn vertex_a(&self) -> &Id {
43 |         &self.inbound_id
44 |     }
45 | 
46 |     fn vertex_b(&self) -> &Id {
47 |         &self.outbound_id
48 |     }
49 | 
50 |     fn edge_a_field() -> u64 {
51 |         *INBOUND_KEY_ID
52 |     }
53 | 
54 |     fn edge_b_field() -> u64 {
55 |         *OUTBOUND_KEY_ID
56 |     }
57 | 
58 |     fn build_edge(a_field: Id, b_field: Id, schema_id: u32, cell: Option<OwnedCell>) -> Self::Edge {
59 |         DirectedEdge {
60 |             inbound_id: a_field,
61 |             outbound_id: b_field,
62 |             schema_id,
63 |             cell,
64 |         }
65 |     }
66 | 
67 |     fn edge_cell(&self) -> &Option<OwnedCell> {
68 |         &self.cell
69 |     }
70 | 
71 |     fn edge_cell_mut(&mut self) -> Option<&mut OwnedCell> {
72 |         self.cell.as_mut()
73 |     }
74 | 
75 |     fn schema_id(&self) -> u32 {
76 |         self.schema_id
77 |     }
78 | 
79 |     fn into_cell(self) -> Option<OwnedCell> {
80 |         self.cell
81 |     }
82 | }
83 | 
84 | pub struct DirectedHyperEdge {
85 |     inbound_ids: Vec<Id>,
86 |     outbound_ids: Vec<Id>,
87 |     cell: OwnedCell,
88 | }
89 | 
90 | edge_index!(DirectedEdge);
91 | 


--------------------------------------------------------------------------------
/src/graph/edge/hyper.rs:
--------------------------------------------------------------------------------
1 | use neb::ram::schema::Field;
2 | use neb::ram::types::Type;
3 | 
4 | lazy_static! {
5 |     pub static ref EDGE_TEMPLATE: Vec<Field> =
6 |         vec![Field::new_unindexed_array("_vertices", Type::Id,)];
7 | }
8 | 


--------------------------------------------------------------------------------
/src/graph/edge/macros.rs:
--------------------------------------------------------------------------------
 1 | #[macro_export]
 2 | macro_rules! edge_index {
 3 |     ($struc: ident) => {
 4 |         use std::ops::{Index, IndexMut};
 5 |         impl Index<u64> for $struc {
 6 |             type Output = OwnedValue;
 7 |             fn index(&self, index: u64) -> &Self::Output {
 8 |                 if let Some(ref cell) = self.cell {
 9 |                     &cell[index]
10 |                 } else {
11 |                     &OwnedValue::Null
12 |                 }
13 |             }
14 |         }
15 | 
16 |         impl<'a> Index<&'a str> for $struc {
17 |             type Output = OwnedValue;
18 |             fn index(&self, index: &'a str) -> &Self::Output {
19 |                 if let Some(ref cell) = self.cell {
20 |                     &cell[index]
21 |                 } else {
22 |                     &OwnedValue::Null
23 |                 }
24 |             }
25 |         }
26 | 
27 |         impl<'a> IndexMut<&'a str> for $struc {
28 |             fn index_mut(&mut self, index: &'a str) -> &mut Self::Output {
29 |                 if let &mut Some(ref mut cell) = &mut self.cell {
30 |                     &mut cell[index]
31 |                 } else {
32 |                     panic!("this edge have no cell");
33 |                 }
34 |             }
35 |         }
36 | 
37 |         impl IndexMut<u64> for $struc {
38 |             fn index_mut(&mut self, index: u64) -> &mut Self::Output {
39 |                 if let &mut Some(ref mut cell) = &mut self.cell {
40 |                     &mut cell[index]
41 |                 } else {
42 |                     panic!("this edge have no cell");
43 |                 }
44 |             }
45 |         }
46 |     };
47 | }
48 | 


--------------------------------------------------------------------------------
/src/graph/edge/mod.rs:
--------------------------------------------------------------------------------
  1 | #[macro_use]
  2 | mod macros;
  3 | 
  4 | pub mod bilateral;
  5 | pub mod directed;
  6 | pub mod hyper;
  7 | pub mod undirectd;
  8 | 
  9 | use super::id_list::IdListError;
 10 | use crate::graph::edge::bilateral::BilateralEdge;
 11 | use crate::server::schema::{GraphSchema, SchemaContainer};
 12 | use dovahkiin::types::OwnedValue;
 13 | use neb::client::transaction::{Transaction, TxnError};
 14 | use neb::ram::cell::OwnedCell;
 15 | use neb::ram::types::Id;
 16 | use std::ops::{Index, IndexMut};
 17 | use std::sync::Arc;
 18 | 
 19 | #[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone, Copy)]
 20 | pub enum EdgeType {
 21 |     Directed,
 22 |     Undirected,
 23 | }
 24 | 
 25 | #[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Clone, Copy)]
 26 | pub struct EdgeAttributes {
 27 |     pub edge_type: EdgeType,
 28 |     pub has_body: bool,
 29 | }
 30 | 
 31 | impl EdgeAttributes {
 32 |     pub fn new(edge_type: EdgeType, has_body: bool) -> EdgeAttributes {
 33 |         EdgeAttributes {
 34 |             edge_type: edge_type,
 35 |             has_body: has_body,
 36 |         }
 37 |     }
 38 | }
 39 | 
 40 | #[derive(Debug, Serialize, Deserialize)]
 41 | pub enum EdgeError {
 42 |     WrongSchema,
 43 |     CannotFindSchema,
 44 |     CellNotFound,
 45 |     WrongVertexField,
 46 |     WrongEdgeType,
 47 |     IdListError(IdListError),
 48 |     SimpleEdgeShouldNotHaveBody,
 49 |     NormalEdgeShouldHaveBody,
 50 |     FilterEvalError(String),
 51 | }
 52 | 
 53 | pub trait TEdge: Send + Sync {
 54 |     type Edge: TEdge + 'static;
 55 |     fn edge_type() -> EdgeType;
 56 | }
 57 | 
 58 | #[derive(Debug)]
 59 | pub enum Edge {
 60 |     Directed(directed::DirectedEdge),
 61 |     Undirected(undirectd::UndirectedEdge),
 62 | }
 63 | 
 64 | impl Edge {
 65 |     pub async fn remove(&self, txn: &Transaction) -> Result<Result<(), EdgeError>, TxnError> {
 66 |         match self {
 67 |             Edge::Directed(e) => e.remove(txn).await,
 68 |             Edge::Undirected(e) => e.remove(txn).await,
 69 |         }
 70 |     }
 71 | 
 72 |     pub async fn get_data(&self) -> &Option<OwnedCell> {
 73 |         match self {
 74 |             &Edge::Directed(ref e) => e.edge_cell(),
 75 |             &Edge::Undirected(ref e) => e.edge_cell(),
 76 |         }
 77 |     }
 78 | 
 79 |     pub async fn get_data_mut(&mut self) -> Option<&mut OwnedCell> {
 80 |         match self {
 81 |             &mut Edge::Directed(ref mut e) => e.edge_cell_mut(),
 82 |             &mut Edge::Undirected(ref mut e) => e.edge_cell_mut(),
 83 |         }
 84 |     }
 85 | 
 86 |     pub fn vertex_id_opposite(&self, vertex_id: &Id) -> Option<&Id> {
 87 |         match self {
 88 |             &Edge::Directed(ref e) => e.oppisite_vertex_id(vertex_id),
 89 |             &Edge::Undirected(ref e) => e.oppisite_vertex_id(vertex_id),
 90 |         }
 91 |     }
 92 |     pub fn vertex_ids(&self) -> (Id, Id) {
 93 |         match self {
 94 |             &Edge::Directed(ref e) => e.vertex_ids(),
 95 |             &Edge::Undirected(ref e) => e.vertex_ids(),
 96 |         }
 97 |     }
 98 |     pub fn cell_id(&self) -> Option<Id> {
 99 |         match self {
100 |             &Edge::Directed(ref e) => e.cell_id(),
101 |             &Edge::Undirected(ref e) => e.cell_id(),
102 |         }
103 |     }
104 | 
105 |     pub fn edge_type(&self) -> EdgeType {
106 |         match self {
107 |             &Edge::Directed(_) => EdgeType::Directed,
108 |             &Edge::Undirected(_) => EdgeType::Undirected,
109 |         }
110 |     }
111 | 
112 |     pub fn into_cell(self) -> Option<OwnedCell> {
113 |         match self {
114 |             Edge::Directed(e) => e.into_cell(),
115 |             Edge::Undirected(e) => e.into_cell(),
116 |         }
117 |     }
118 | }
119 | 
120 | impl Index<u64> for Edge {
121 |     type Output = OwnedValue;
122 |     fn index(&self, index: u64) -> &Self::Output {
123 |         match self {
124 |             &Edge::Directed(ref e) => &e[index],
125 |             &Edge::Undirected(ref e) => &e[index],
126 |         }
127 |     }
128 | }
129 | 
130 | impl<'a> Index<&'a str> for Edge {
131 |     type Output = OwnedValue;
132 |     fn index(&self, index: &'a str) -> &Self::Output {
133 |         match self {
134 |             &Edge::Directed(ref e) => &e[index],
135 |             &Edge::Undirected(ref e) => &e[index],
136 |         }
137 |     }
138 | }
139 | 
140 | impl<'a> IndexMut<&'a str> for Edge {
141 |     fn index_mut(&mut self, index: &'a str) -> &mut Self::Output {
142 |         match self {
143 |             &mut Edge::Directed(ref mut e) => &mut e[index],
144 |             &mut Edge::Undirected(ref mut e) => &mut e[index],
145 |         }
146 |     }
147 | }
148 | 
149 | impl IndexMut<u64> for Edge {
150 |     fn index_mut(&mut self, index: u64) -> &mut Self::Output {
151 |         match self {
152 |             &mut Edge::Directed(ref mut e) => &mut e[index],
153 |             &mut Edge::Undirected(ref mut e) => &mut e[index],
154 |         }
155 |     }
156 | }
157 | 
158 | pub async fn from_id(
159 |     vertex_id: Id,
160 |     edge_attrs: &EdgeAttributes,
161 |     edge_schema_id: u32,
162 |     txn: &Transaction,
163 |     id: Id,
164 | ) -> Result<Result<Edge, EdgeError>, TxnError> {
165 |     match edge_attrs.edge_type {
166 |         EdgeType::Directed => {
167 |             directed::DirectedEdge::from_id(vertex_id, edge_attrs, edge_schema_id, txn, id)
168 |                 .await
169 |                 .map(|r| r.map(Edge::Directed))
170 |         }
171 |         EdgeType::Undirected => {
172 |             undirectd::UndirectedEdge::from_id(vertex_id, edge_attrs, edge_schema_id, txn, id)
173 |                 .await
174 |                 .map(|r| r.map(Edge::Undirected))
175 |         }
176 |     }
177 | }
178 | 


--------------------------------------------------------------------------------
/src/graph/edge/undirectd.rs:
--------------------------------------------------------------------------------
 1 | use dovahkiin::types::Type;
 2 | use neb::ram::cell::OwnedCell;
 3 | use neb::ram::schema::Field;
 4 | use neb::ram::types::OwnedValue;
 5 | use neb::ram::types::{key_hash, Id};
 6 | 
 7 | use super::bilateral::BilateralEdge;
 8 | use super::{EdgeType, TEdge};
 9 | use crate::graph::fields::*;
10 | 
11 | lazy_static! {
12 |     pub static ref EDGE_VERTEX_A_NAME: String = String::from("_vertex_a");
13 |     pub static ref EDGE_VERTEX_B_NAME: String = String::from("_vertex_b");
14 |     pub static ref EDGE_TEMPLATE: Vec<Field> = vec![
15 |         Field::new_unindexed(&*EDGE_VERTEX_A_NAME, Type::Id),
16 |         Field::new_unindexed(&*EDGE_VERTEX_B_NAME, Type::Id),
17 |     ];
18 |     pub static ref EDGE_VERTEX_A_ID: u64 = key_hash(&*EDGE_VERTEX_A_NAME);
19 |     pub static ref EDGE_VERTEX_B_ID: u64 = key_hash(&*EDGE_VERTEX_B_NAME);
20 | }
21 | 
22 | #[derive(Debug)]
23 | pub struct UndirectedEdge {
24 |     vertex_a_id: Id,
25 |     vertex_b_id: Id,
26 |     schema_id: u32,
27 |     cell: Option<OwnedCell>,
28 | }
29 | 
30 | impl TEdge for UndirectedEdge {
31 |     type Edge = UndirectedEdge;
32 |     fn edge_type() -> EdgeType {
33 |         EdgeType::Undirected
34 |     }
35 | }
36 | 
37 | impl BilateralEdge for UndirectedEdge {
38 |     fn vertex_a_field() -> u64 {
39 |         *UNDIRECTED_KEY_ID
40 |     }
41 | 
42 |     fn vertex_b_field() -> u64 {
43 |         *UNDIRECTED_KEY_ID
44 |     }
45 | 
46 |     fn vertex_a(&self) -> &Id {
47 |         &self.vertex_a_id
48 |     }
49 | 
50 |     fn vertex_b(&self) -> &Id {
51 |         &self.vertex_b_id
52 |     }
53 | 
54 |     fn edge_a_field() -> u64 {
55 |         *EDGE_VERTEX_A_ID
56 |     }
57 | 
58 |     fn edge_b_field() -> u64 {
59 |         *EDGE_VERTEX_B_ID
60 |     }
61 | 
62 |     fn build_edge(a_field: Id, b_field: Id, schema_id: u32, cell: Option<OwnedCell>) -> Self::Edge {
63 |         UndirectedEdge {
64 |             vertex_a_id: a_field,
65 |             vertex_b_id: b_field,
66 |             schema_id: schema_id,
67 |             cell: cell,
68 |         }
69 |     }
70 | 
71 |     fn edge_cell(&self) -> &Option<OwnedCell> {
72 |         &self.cell
73 |     }
74 |     fn schema_id(&self) -> u32 {
75 |         self.schema_id
76 |     }
77 | 
78 |     fn into_cell(self) -> Option<OwnedCell> {
79 |         self.cell
80 |     }
81 | 
82 |     fn edge_cell_mut(&mut self) -> Option<&mut OwnedCell> {
83 |         self.cell.as_mut()
84 |     }
85 | }
86 | 
87 | edge_index!(UndirectedEdge);
88 | 


--------------------------------------------------------------------------------
/src/graph/fields.rs:
--------------------------------------------------------------------------------
 1 | use dovahkiin::types::Type;
 2 | use neb::ram::schema::Field;
 3 | use neb::ram::types::key_hash;
 4 | 
 5 | pub const INBOUND_KEY: &'static str = "_inbound";
 6 | pub const OUTBOUND_KEY: &'static str = "_outbound";
 7 | pub const UNDIRECTED_KEY: &'static str = "_undirected";
 8 | 
 9 | lazy_static! {
10 |     pub static ref INBOUND_NAME: String = String::from(INBOUND_KEY);
11 |     pub static ref OUTBOUND_NAME: String = String::from(OUTBOUND_KEY);
12 |     pub static ref UNDIRECTED_NAME: String = String::from(UNDIRECTED_KEY);
13 |     pub static ref VERTEX_TEMPLATE: Vec<Field> = vec![
14 |         Field::new_unindexed(&*OUTBOUND_NAME, Type::Id),
15 |         Field::new_unindexed(&*INBOUND_NAME, Type::Id),
16 |         Field::new_unindexed(&*UNDIRECTED_NAME, Type::Id),
17 |     ];
18 |     pub static ref INBOUND_KEY_ID: u64 = key_hash(&*INBOUND_NAME);
19 |     pub static ref OUTBOUND_KEY_ID: u64 = key_hash(&*OUTBOUND_NAME);
20 |     pub static ref UNDIRECTED_KEY_ID: u64 = key_hash(&*UNDIRECTED_NAME);
21 | }
22 | 


--------------------------------------------------------------------------------
/src/graph/partitioner/mod.rs:
--------------------------------------------------------------------------------
  1 | use ahash::AHashMap;
  2 | use std::{collections::HashMap, collections::HashSet, hash::Hasher};
  3 | 
  4 | use dovahkiin::{
  5 |     ahash::AHasher,
  6 |     types::{Id, OwnedValue},
  7 | };
  8 | use neb::ram::types::{RandId, RandValue};
  9 | 
 10 | use super::{local::LocalGraph, vertex::Vertex};
 11 | 
 12 | pub mod vector;
 13 | #[cfg(test)]
 14 | mod vector_test;
 15 | 
 16 | use vector::*;
 17 | 
 18 | /// Trait for determining the partition key for vertices
 19 | pub trait Partitioner: Clone {
 20 |     /// Calculate a partition key based on one or two vertices
 21 |     fn partition_key(&self, v1: Option<&Vertex>, v2: Option<&Vertex>) -> u64;
 22 | 
 23 |     fn partition_local_graph(&self, graph: &mut LocalGraph);
 24 | }
 25 | 
 26 | /// Partitions based on copying the partition key from an adjacent (neighbor) vertex
 27 | #[derive(Clone, Copy)]
 28 | pub struct SameAsNeighbourPartitioner;
 29 | 
 30 | impl Partitioner for SameAsNeighbourPartitioner {
 31 |     fn partition_key(&self, v1: Option<&Vertex>, v2: Option<&Vertex>) -> u64 {
 32 |         v2.map_or(0, |v| v.id().higher)
 33 |     }
 34 | 
 35 |     fn partition_local_graph(&self, graph: &mut LocalGraph) {
 36 |         if graph.vertices().is_empty() {
 37 |             return;
 38 |         }
 39 |         let mut counter = AHashMap::new();
 40 |         let mut vertex_updates = AHashMap::new();
 41 | 
 42 |         // Count partitions and identify vertices that need repartitioning
 43 |         for (vid, _) in graph.vertices() {
 44 |             let partition = vid.higher;
 45 |             if partition == 0 {
 46 |                 vertex_updates.insert(*vid, 0u64); // Placeholder, will be updated with actual partition
 47 |                 continue;
 48 |             }
 49 |             *counter.entry(partition).or_insert(0) += 1;
 50 |         }
 51 | 
 52 |         // Determine the target partition (most common or random if none)
 53 |         let partition = if !counter.is_empty() {
 54 |             *counter.iter().max_by_key(|(_, count)| *count).unwrap().0
 55 |         } else {
 56 |             Id::rand_lower().higher
 57 |         };
 58 | 
 59 |         // Update the placeholder values with the actual partition
 60 |         for (_, part) in vertex_updates.iter_mut() {
 61 |             *part = partition;
 62 |         }
 63 | 
 64 |         // Prepare edge updates
 65 |         let mut edge_updates = AHashMap::new();
 66 |         for (edge_id, edge) in graph.edges() {
 67 |             if edge_id.higher == 0 {
 68 |                 // Assign edges to the same partition as their vertices
 69 |                 edge_updates.insert(*edge_id, edge.vertex_a.higher);
 70 |             }
 71 |         }
 72 | 
 73 |         // Apply all updates at once using the update_ids method
 74 |         graph.update_ids(&vertex_updates, &edge_updates);
 75 |     }
 76 | }
 77 | 
 78 | /// Partitions based on a specific field from the vertex
 79 | #[derive(Clone, Copy)]
 80 | pub struct FromFieldPartitioner {
 81 |     field_id: u64,
 82 | }
 83 | 
 84 | impl FromFieldPartitioner {
 85 |     pub fn new(field_id: u64) -> Self {
 86 |         Self { field_id }
 87 |     }
 88 | }
 89 | 
 90 | impl Partitioner for FromFieldPartitioner {
 91 |     fn partition_key(&self, v1: Option<&Vertex>, v2: Option<&Vertex>) -> u64 {
 92 |         v1.map_or(0, |v| self.hash_field_value(v.schema(), &v.cell.data))
 93 |     }
 94 | 
 95 |     fn partition_local_graph(&self, graph: &mut LocalGraph) {
 96 |         if graph.vertices().is_empty() {
 97 |             return;
 98 |         }
 99 |         // Get all vertices that need to be repartitioned
100 |         let mut vertex_updates = AHashMap::new();
101 | 
102 |         // First pass: identify vertices that need repartitioning
103 |         for (vertex_id, vertex) in graph.vertices() {
104 |             let partition = self.hash_field_value(vertex.cell.header.schema, &vertex.cell.data);
105 |             if vertex_id.higher == 0 {
106 |                 vertex_updates.insert(*vertex_id, partition);
107 |             }
108 |         }
109 | 
110 |         // Update edge partitions and references
111 |         let mut edge_updates = AHashMap::new();
112 |         for (edge_id, edge) in graph.edges() {
113 |             if edge_id.higher == 0 {
114 |                 edge_updates.insert(*edge_id, edge.vertex_a.higher);
115 |             }
116 |         }
117 | 
118 |         graph.update_ids(&vertex_updates, &edge_updates);
119 |     }
120 | }
121 | 
122 | impl FromFieldPartitioner {
123 |     fn hash_field_value(&self, schema_id: u32, value: &OwnedValue) -> u64 {
124 |         // Implementation for hashing field value
125 |         let hash_bytes = value[self.field_id].hash();
126 |         let mut hasher = AHasher::default();
127 |         hasher.write_u32(schema_id);
128 |         hasher.write_u64(self.field_id);
129 |         hasher.write(&hash_bytes);
130 |         hasher.finish()
131 |     }
132 | }
133 | 
134 | /// Partitions by selecting a random partition key
135 | #[derive(Clone, Copy)]
136 | pub struct RandomPartitioner;
137 | 
138 | impl Partitioner for RandomPartitioner {
139 |     fn partition_key(&self, _v1: Option<&Vertex>, _v2: Option<&Vertex>) -> u64 {
140 |         random_partition_key()
141 |     }
142 | 
143 |     fn partition_local_graph(&self, graph: &mut LocalGraph) {
144 |         if graph.vertices().is_empty() {
145 |             return;
146 |         }
147 |         let mut vertex_updates = AHashMap::new();
148 |         for (vertex_id, _vertex) in graph.vertices() {
149 |             if vertex_id.higher == 0 {
150 |                 vertex_updates.insert(*vertex_id, random_partition_key());
151 |             }
152 |         }
153 |         let mut edge_updates = AHashMap::new();
154 |         for (edge_id, edge) in graph.edges() {
155 |             if edge_id.higher == 0 {
156 |                 edge_updates.insert(*edge_id, edge.vertex_a.higher);
157 |             }
158 |         }
159 |         graph.update_ids(&vertex_updates, &edge_updates);
160 |     }
161 | }
162 | 
163 | // /// Partitions using a user-supplied function
164 | // #[derive(Clone)]
165 | // pub struct CustomPartitioner {
166 | //     func: Box<dyn Fn(Option<&Vertex>, Option<&Vertex>) -> u64 + Send + Sync + 'static + Clone>,
167 | // }
168 | // impl CustomPartitioner {
169 | //     pub fn new<F>(func: F) -> Self
170 | //     where
171 | //         F: Fn(Option<&Vertex>, Option<&Vertex>) -> u64 + 'static,
172 | //     {
173 | //         Self { func: Box::new(func) }
174 | //     }
175 | // }
176 | 
177 | // impl Partitioner for CustomPartitioner {
178 | //     fn partition_key(&self, v1: Option<&Vertex>, v2: Option<&Vertex>) -> u64 {
179 | //         (self.func)(v1, v2)
180 | //     }
181 | // }
182 | 
183 | fn random_partition_key() -> u64 {
184 |     Id::rand().higher
185 | }
186 | 
187 | #[derive(Clone, Copy)]
188 | pub struct DefaultPartitioner;
189 | 
190 | impl Partitioner for DefaultPartitioner {
191 |     fn partition_key(&self, _v1: Option<&Vertex>, _v2: Option<&Vertex>) -> u64 {
192 |         return 0;
193 |     }
194 | 
195 |     fn partition_local_graph(&self, _graph: &mut LocalGraph) {}
196 | }
197 | 


--------------------------------------------------------------------------------
/src/graph/partitioner/vector.rs:
--------------------------------------------------------------------------------
  1 | // A vector partitioner that uses LSH with cached projection vectors
  2 | // and then converts the resulting binary hash into Gray code.
  3 | 
  4 | use ahash::AHashMap;
  5 | use dovahkiin::types::{OwnedPrimArray, OwnedValue};
  6 | use neb::ram::cell::OwnedCell;
  7 | use once_cell::sync::Lazy;
  8 | use rand::prelude::*;
  9 | use rand_distr::{Distribution, Normal};
 10 | use std::sync::RwLock;
 11 | 
 12 | use crate::graph::vertex::Vertex;
 13 | 
 14 | use super::{random_partition_key, Partitioner};
 15 | 
 16 | const NUM_BITS: usize = 64;
 17 | 
 18 | // Global cache storing a native Vec<Vec<f64>> for the current dimension.
 19 | // The Option holds (dimension, projection_vectors), where projection_vectors
 20 | // is a Vec of projection vectors (each is a Vec<f64>), and the number of vectors equals the maximum requested bits so far.
 21 | static PROJECTION_CACHE: Lazy<RwLock<Option<(usize, Vec<Vec<f32>>)>>> =
 22 |     Lazy::new(|| RwLock::new(None));
 23 | 
 24 | /// Deterministically generate `n_bits` projection vectors of dimension `dim` using a fixed seed.
 25 | /// Returns a Vec of projection vectors (each vector is a Vec<f64> of length `dim`).
 26 | fn generate_deterministic_vectors(dim: usize, n_bits: usize) -> Vec<Vec<f32>> {
 27 |     let seed: [u8; 32] = [42; 32]; // Fixed seed for determinism.
 28 |     let mut rng = rand::rngs::StdRng::from_seed(seed);
 29 |     let normal = Normal::new(0.0, 1.0).unwrap();
 30 |     (0..n_bits)
 31 |         .map(|_| {
 32 |             (0..dim)
 33 |                 .map(|_| normal.sample(&mut rng))
 34 |                 .collect::<Vec<_>>()
 35 |         })
 36 |         .collect()
 37 | }
 38 | 
 39 | /// Retrieves projection vectors for the given dimension and requested number of bits as a native Vec<Vec<f64>>.
 40 | /// If the global cache is already populated for the same dimension and has at least `requested_bits` vectors,
 41 | /// returns the first `requested_bits` vectors (cloned). Otherwise, regenerates and caches the new set.
 42 | fn get_projection_vectors(dim: usize, requested_bits: usize) -> Vec<Vec<f32>> {
 43 |     {
 44 |         let cache = PROJECTION_CACHE.read().unwrap();
 45 |         if let Some((cached_dim, ref vectors)) = *cache {
 46 |             if cached_dim == dim && vectors.len() >= requested_bits {
 47 |                 // Return the first `requested_bits` vectors.
 48 |                 return vectors[..requested_bits].to_vec();
 49 |             }
 50 |         }
 51 |     }
 52 |     // Acquire a write lock to update the cache.
 53 |     let mut cache = PROJECTION_CACHE.write().unwrap();
 54 |     // Regenerate the projection vectors for this dimension with the requested number of bits.
 55 |     let new_vectors = generate_deterministic_vectors(dim, requested_bits);
 56 |     *cache = Some((dim, new_vectors.clone()));
 57 |     new_vectors
 58 | }
 59 | 
 60 | /// Convert a binary number to its Gray code representation.
 61 | fn binary_to_gray(n: u64) -> u64 {
 62 |     n ^ (n >> 1)
 63 | }
 64 | 
 65 | /// Normalize a vector to unit length
 66 | fn normalize_vector(vector: &[f32]) -> Vec<f32> {
 67 |     let norm: f32 = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
 68 |     if norm == 0.0 {
 69 |         return vector.to_vec();
 70 |     }
 71 |     vector.iter().map(|x| x / norm).collect()
 72 | }
 73 | 
 74 | /// A partitioner that computes the partition key based on the vertex's vector
 75 | /// using LSH with cached projection vectors (stored natively as Vec<Vec<f64>>)
 76 | /// and then converts the resulting binary hash into Gray code.
 77 | #[derive(Debug, Clone)]
 78 | pub struct FromVectorPartitioner {
 79 |     field_id: u64,
 80 | }
 81 | 
 82 | impl FromVectorPartitioner {
 83 |     /// Creates a new partitioner for the given field, dimension, and number of bits.
 84 |     pub fn new(field_id: u64) -> Self {
 85 |         Self { field_id }
 86 |     }
 87 | }
 88 | 
 89 | impl Partitioner for FromVectorPartitioner {
 90 |     /// Computes the partition key from the vertex's vector.
 91 |     /// It uses the globally cached projection vectors (extending them if needed)
 92 |     /// to compute a binary hash (by taking dot products) and then converts that hash into Gray code.
 93 |     fn partition_key(&self, v1: Option<&Vertex>, _v2: Option<&Vertex>) -> u64 {
 94 |         if let Some(vertex) = v1 {
 95 |             self.vertex_partition_key(&vertex.cell)
 96 |         } else {
 97 |             // Fallback: if no vertex is provided, return a constant.
 98 |             0
 99 |         }
100 |     }
101 | 
102 |     fn partition_local_graph(&self, graph: &mut crate::graph::local::LocalGraph) {
103 |         if graph.vertices().is_empty() {
104 |             return;
105 |         }
106 |         let mut vertex_updates = AHashMap::new();
107 |         for (vertex_id, _vertex) in graph.vertices() {
108 |             if vertex_id.higher == 0 {
109 |                 vertex_updates.insert(*vertex_id, self.vertex_partition_key(&_vertex.cell));
110 |             }
111 |         }
112 |         let mut edge_updates = AHashMap::new();
113 |         for (edge_id, edge) in graph.edges() {
114 |             if edge_id.higher == 0 {
115 |                 edge_updates.insert(*edge_id, edge.vertex_a.higher);
116 |             }
117 |         }
118 |         graph.update_ids(&vertex_updates, &edge_updates);
119 |     }
120 | }
121 | 
122 | impl FromVectorPartitioner {
123 |     pub fn vertex_partition_key(&self, cell: &OwnedCell) -> u64 {
124 |         let field = &cell.data[self.field_id];
125 |         match field {
126 |             OwnedValue::PrimArray(prim_array) => array_partition_key(prim_array),
127 |             _ => {
128 |                 return 0;
129 |             }
130 |         }
131 |     }
132 | }
133 | 
134 | pub fn array_partition_key(array: &OwnedPrimArray) -> u64 {
135 |     let vector = match array {
136 |         OwnedPrimArray::F32(vector) => vector.clone(),
137 |         OwnedPrimArray::F64(vector) => vector.iter().map(|f| *f as f32).collect(),
138 |         OwnedPrimArray::U8(vector) => vector.iter().map(|f| *f as f32).collect(),
139 |         OwnedPrimArray::U16(vector) => vector.iter().map(|f| *f as f32).collect(),
140 |         OwnedPrimArray::U32(vector) => vector.iter().map(|f| *f as f32).collect(),
141 |         OwnedPrimArray::U64(vector) => vector.iter().map(|f| *f as f32).collect(),
142 |         OwnedPrimArray::I8(vector) => vector.iter().map(|f| *f as f32).collect(),
143 |         OwnedPrimArray::I16(vector) => vector.iter().map(|f| *f as f32).collect(),
144 |         OwnedPrimArray::I32(vector) => vector.iter().map(|f| *f as f32).collect(),
145 |         OwnedPrimArray::I64(vector) => vector.iter().map(|f| *f as f32).collect(),
146 |         _ => {
147 |             return 0;
148 |         }
149 |     };
150 |     let normalized_vector = normalize_vector(&vector);
151 |     let projections = get_projection_vectors(normalized_vector.len(), NUM_BITS);
152 |     let mut hash: u64 = 0;
153 |     for (i, proj) in projections.iter().enumerate() {
154 |         let dot: f32 = normalized_vector
155 |             .iter()
156 |             .zip(proj.iter())
157 |             .map(|(a, b)| a * b)
158 |             .sum();
159 |         if dot > 0.0 {
160 |             hash |= 1 << i;
161 |         }
162 |     }
163 |     binary_to_gray(hash)
164 | }
165 | 


--------------------------------------------------------------------------------
/src/graph/partitioner/vector_test.rs:
--------------------------------------------------------------------------------
  1 | use super::*;
  2 | use bifrost_hasher::hash_str;
  3 | use dovahkiin::data_map;
  4 | use dovahkiin::types::Map;
  5 | use dovahkiin::types::OwnedPrimArray;
  6 | use neb::ram::cell::OwnedCell;
  7 | use neb::ram::types::OwnedValue;
  8 | 
  9 | const VECTOR_FIELD: &str = "v";
 10 | lazy_static! {
 11 |     static ref VECTOR_FIELD_ID: u64 = hash_str(VECTOR_FIELD);
 12 | }
 13 | 
 14 | fn create_test_vector(values: &[f32]) -> OwnedCell {
 15 |     let mut cell = OwnedCell::default();
 16 |     cell.data = OwnedValue::Map(data_map! {
 17 |         v: OwnedValue::PrimArray(OwnedPrimArray::F32(values.to_vec()))
 18 |     });
 19 |     cell
 20 | }
 21 | 
 22 | fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
 23 |     let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
 24 |     let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
 25 |     let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
 26 |     dot_product / (norm_a * norm_b)
 27 | }
 28 | 
 29 | fn partition_number_distance(a: u64, b: u64) -> u64 {
 30 |     if a > b {
 31 |         a - b
 32 |     } else {
 33 |         b - a
 34 |     }
 35 | }
 36 | 
 37 | #[test]
 38 | fn test_similar_vectors_get_similar_partitions() {
 39 |     let partitioner = FromVectorPartitioner::new(*VECTOR_FIELD_ID);
 40 | 
 41 |     let vs1 = [1.0, 2.0, 3.0, 4.0];
 42 |     let vs2 = [1.01, 2.02, 3.03, 4.04];
 43 | 
 44 |     // Create two very similar vectors
 45 |     let v1 = create_test_vector(&vs1);
 46 |     let v2 = create_test_vector(&vs2);
 47 | 
 48 |     let vertex1 = Vertex { cell: v1 };
 49 |     let vertex2 = Vertex { cell: v2 };
 50 | 
 51 |     let p1 = partitioner.partition_key(Some(&vertex1), None);
 52 |     let p2 = partitioner.partition_key(Some(&vertex2), None);
 53 | 
 54 |     // Similar vectors should get numerically close partition numbers
 55 |     let distance = partition_number_distance(p1, p2);
 56 |     assert!(
 57 |         distance < 10,
 58 |         "Similar vectors got too distant partition numbers: {} vs {}",
 59 |         p1,
 60 |         p2
 61 |     );
 62 | }
 63 | 
 64 | #[test]
 65 | fn test_vector_similarity_vs_partition_distance() {
 66 |     let partitioner = FromVectorPartitioner::new(*VECTOR_FIELD_ID);
 67 | 
 68 |     // Create a base vector
 69 |     let base_vector = create_test_vector(&[1.0, 0.0, 0.0, 0.0]);
 70 |     let base_vertex = Vertex { cell: base_vector };
 71 |     let base_partition = partitioner.partition_key(Some(&base_vertex), None);
 72 | 
 73 |     // Test vectors with varying similarity to base
 74 |     let test_vectors = vec![
 75 |         create_test_vector(&[0.99, 0.01, 0.0, 0.0]), // Very similar
 76 |         create_test_vector(&[0.9, 0.1, 0.0, 0.0]),   // Somewhat similar
 77 |         create_test_vector(&[0.7, 0.3, 0.0, 0.0]),   // Less similar
 78 |         create_test_vector(&[-0.7, -1.0, 0.9, 1.0]), // Orthogonal
 79 |     ];
 80 |     let mut last_partition = 0;
 81 |     for test_vector in test_vectors {
 82 |         let test_vertex = Vertex { cell: test_vector };
 83 |         let test_partition = partitioner.partition_key(Some(&test_vertex), None);
 84 |         let partition_dist = partition_number_distance(base_partition, test_partition);
 85 |         println!("Partition distance: {}", partition_dist);
 86 |         assert!(last_partition <= partition_dist);
 87 |         last_partition = partition_dist;
 88 |     }
 89 | }
 90 | 
 91 | #[test]
 92 | fn test_boundary_conditions() {
 93 |     let partitioner = FromVectorPartitioner::new(*VECTOR_FIELD_ID);
 94 | 
 95 |     // Test zero vector
 96 |     let zero_vector = create_test_vector(&[0.0, 0.0, 0.0, 0.0]);
 97 |     let zero_vertex = Vertex { cell: zero_vector };
 98 |     let zero_partition = partitioner.partition_key(Some(&zero_vertex), None);
 99 | 
100 |     // Ensure zero vector gets a valid partition
101 |     assert!(
102 |         zero_partition <= u64::MAX,
103 |         "Zero vector should get a valid partition"
104 |     );
105 | 
106 |     // Test unit vectors along different dimensions
107 |     let unit_vectors = vec![
108 |         create_test_vector(&[1.0, 0.0, 0.0, 0.0]),
109 |         create_test_vector(&[0.0, 1.0, 0.0, 0.0]),
110 |         create_test_vector(&[0.0, 0.0, 1.0, 0.0]),
111 |         create_test_vector(&[0.0, 0.0, 0.0, 1.0]),
112 |     ];
113 | 
114 |     let mut unit_partitions = Vec::new();
115 |     for unit_vector in unit_vectors {
116 |         let unit_vertex = Vertex { cell: unit_vector };
117 |         let unit_partition = partitioner.partition_key(Some(&unit_vertex), None);
118 |         unit_partitions.push(unit_partition);
119 |     }
120 | 
121 |     // Check that unit vectors get different partitions
122 |     for i in 0..unit_partitions.len() {
123 |         for j in i + 1..unit_partitions.len() {
124 |             assert_ne!(
125 |                 unit_partitions[i], unit_partitions[j],
126 |                 "Different unit vectors should get different partitions"
127 |             );
128 |         }
129 |     }
130 | 
131 |     // Test extreme values
132 |     let extreme_vector = create_test_vector(&[f32::MAX, f32::MIN, f32::EPSILON, -0.0]);
133 |     let extreme_vertex = Vertex {
134 |         cell: extreme_vector,
135 |     };
136 |     let extreme_partition = partitioner.partition_key(Some(&extreme_vertex), None);
137 |     assert!(
138 |         extreme_partition <= u64::MAX,
139 |         "Extreme values should get valid partitions"
140 |     );
141 | 
142 |     // Test NaN handling (should be normalized properly)
143 |     let nan_vector = create_test_vector(&[f32::NAN, 0.0, 0.0, 0.0]);
144 |     let nan_vertex = Vertex { cell: nan_vector };
145 |     let nan_partition = partitioner.partition_key(Some(&nan_vertex), None);
146 |     assert!(
147 |         nan_partition <= u64::MAX,
148 |         "NaN values should be handled gracefully"
149 |     );
150 | }
151 | 
152 | #[test]
153 | fn test_deterministic_behavior() {
154 |     let partitioner1 = FromVectorPartitioner::new(*VECTOR_FIELD_ID);
155 |     let partitioner2 = FromVectorPartitioner::new(*VECTOR_FIELD_ID);
156 | 
157 |     let test_vector = create_test_vector(&[1.0, 2.0, 3.0, 4.0]);
158 |     let test_vertex = Vertex { cell: test_vector };
159 | 
160 |     let p1 = partitioner1.partition_key(Some(&test_vertex), None);
161 |     let p2 = partitioner2.partition_key(Some(&test_vertex), None);
162 | 
163 |     assert_eq!(
164 |         p1, p2,
165 |         "Same vector should get same partition number across partitioner instances"
166 |     );
167 | }
168 | 
169 | #[test]
170 | fn test_gray_code_properties() {
171 |     let partitioner = FromVectorPartitioner::new(*VECTOR_FIELD_ID);
172 | 
173 |     // Test that consecutive numbers differ by only one bit
174 |     let test_vector = create_test_vector(&[1.0, 0.0, 0.0, 0.0]);
175 |     let test_vertex = Vertex { cell: test_vector };
176 | 
177 |     let partition = partitioner.partition_key(Some(&test_vertex), None);
178 |     let next_partition = partition + 1;
179 | 
180 |     let diff = partition ^ next_partition;
181 |     assert!(
182 |         diff.count_ones() <= 2,
183 |         "Adjacent Gray codes should differ by at most one bit"
184 |     );
185 | }
186 | 


--------------------------------------------------------------------------------
/src/graph/vertex/mod.rs:
--------------------------------------------------------------------------------
  1 | use crate::graph::edge;
  2 | use crate::graph::id_list::{IdList, IdListError};
  3 | use crate::server::schema::{GraphSchema, SchemaContainer};
  4 | use dovahkiin::types::{OwnedMap, OwnedValue};
  5 | use neb::client::transaction::{Transaction, TxnError};
  6 | use neb::ram::cell::OwnedCell;
  7 | use neb::ram::types::Id;
  8 | 
  9 | use super::EdgeDirection;
 10 | use std::ops::{Index, IndexMut};
 11 | use std::sync::Arc;
 12 | 
 13 | #[derive(Debug, Clone, Default)]
 14 | pub struct Vertex {
 15 |     pub cell: OwnedCell,
 16 | }
 17 | 
 18 | #[derive(Debug)]
 19 | pub enum RemoveError {
 20 |     NotFound,
 21 |     FormatError,
 22 |     IdListError(IdListError),
 23 |     EdgeError(edge::EdgeError),
 24 | }
 25 | 
 26 | pub fn cell_to_vertex<'a>(cell: OwnedCell) -> Vertex {
 27 |     Vertex { cell }
 28 | }
 29 | 
 30 | pub fn vertex_to_cell<'a>(vertex: Vertex) -> OwnedCell {
 31 |     vertex.cell
 32 | }
 33 | 
 34 | impl Vertex {
 35 |     pub fn new(schema: u32, data: OwnedMap) -> Vertex {
 36 |         Self::new_with_id(Id::unit_id(), schema, data)
 37 |     }
 38 |     pub fn new_with_id(id: Id, schema: u32, data: OwnedMap) -> Vertex {
 39 |         Vertex {
 40 |             cell: OwnedCell::new_with_id(schema, &id, OwnedValue::Map(data)),
 41 |         }
 42 |     }
 43 |     pub fn schema(&self) -> u32 {
 44 |         self.cell.header.schema
 45 |     }
 46 | }
 47 | 
 48 | pub async fn txn_remove<V>(
 49 |     txn: &Transaction,
 50 |     schemas: &Arc<SchemaContainer>,
 51 |     vertex: V,
 52 | ) -> Result<Result<(), RemoveError>, TxnError>
 53 | where
 54 |     V: ToVertexId,
 55 | {
 56 |     let id = &vertex.to_id();
 57 |     match txn.head(*id).await? {
 58 |         Some(_head) => {
 59 |             let remove_field_lists = |id: Id, txn: Transaction, field_id: u64| {
 60 |                 async move {
 61 |                     let (type_list_id, schemas_ids) =
 62 |                         match IdList::cell_types(&txn, id, field_id).await? {
 63 |                             Some(t) => t,
 64 |                             None => {
 65 |                                 error!("Failed to get type list, {:?}", id);
 66 |                                 return Ok(Err(RemoveError::FormatError));
 67 |                             }
 68 |                         };
 69 |                     for schema_id in schemas_ids {
 70 |                         let mut id_list =
 71 |                             IdList::from_txn_and_container(&txn, id, field_id, schema_id);
 72 |                         {
 73 |                             // remove edge cells
 74 |                             let mut iter = match id_list.iter().await? {
 75 |                                 Ok(iter) => iter,
 76 |                                 Err(e) => return Ok(Err(RemoveError::IdListError(e))),
 77 |                             };
 78 |                             let edge_attrs = match schemas.schema_type(schema_id) {
 79 |                                 Some(GraphSchema::Edge(ea)) => ea,
 80 |                                 _ => return Ok(Err(RemoveError::FormatError)),
 81 |                             };
 82 |                             let edge_schema_id = schema_id;
 83 |                             while let Some(edge_id) = iter.next().await {
 84 |                                 let edge = match edge::from_id(
 85 |                                     id,
 86 |                                     &edge_attrs,
 87 |                                     edge_schema_id,
 88 |                                     iter.segments.id_iter.txn,
 89 |                                     edge_id,
 90 |                                 )
 91 |                                 .await?
 92 |                                 {
 93 |                                     Ok(edge) => edge,
 94 |                                     Err(e) => return Ok(Err(RemoveError::EdgeError(e))),
 95 |                                 };
 96 |                                 match edge.remove(iter.segments.id_iter.txn).await? {
 97 |                                     Ok(()) => {}
 98 |                                     Err(e) => return Ok(Err(RemoveError::EdgeError(e))),
 99 |                                 }
100 |                             }
101 |                         }
102 |                         match id_list.clear_segments().await? {
103 |                             // remove segment cells
104 |                             Ok(()) => {}
105 |                             Err(e) => return Ok(Err(RemoveError::IdListError(e))),
106 |                         }
107 |                     }
108 |                     txn.remove(type_list_id).await?; // remove field schema list cell
109 |                     Ok(Ok(()))
110 |                 }
111 |             };
112 |             match remove_field_lists(*id, txn.clone(), EdgeDirection::Undirected.as_field()).await?
113 |             {
114 |                 Ok(()) => {}
115 |                 Err(e) => return Ok(Err(e)),
116 |             }
117 |             match remove_field_lists(*id, txn.clone(), EdgeDirection::Inbound.as_field()).await? {
118 |                 Ok(()) => {}
119 |                 Err(e) => return Ok(Err(e)),
120 |             }
121 |             match remove_field_lists(*id, txn.clone(), EdgeDirection::Outbound.as_field()).await? {
122 |                 Ok(()) => {}
123 |                 Err(e) => return Ok(Err(e)),
124 |             }
125 |             txn.remove(*id).await.map(|_| Ok(())) // remove vertex cell
126 |         }
127 |         None => Ok(Err(RemoveError::NotFound)),
128 |     }
129 | }
130 | 
131 | pub async fn txn_update<U, V>(txn: &Transaction, vertex: V, update: U) -> Result<(), TxnError>
132 | where
133 |     V: ToVertexId,
134 |     U: Fn(Vertex) -> Option<Vertex>,
135 | {
136 |     let id = &vertex.to_id();
137 |     let update_cell = |cell| match update(cell_to_vertex(cell)) {
138 |         Some(vertex) => Some(vertex_to_cell(vertex)),
139 |         None => None,
140 |     };
141 |     let cell = txn.read(*id).await?;
142 |     match cell {
143 |         Some(cell) => match update_cell(cell) {
144 |             Some(cell) => txn.update(cell).await,
145 |             None => txn.abort().await,
146 |         },
147 |         None => txn.abort().await,
148 |     }
149 | }
150 | 
151 | impl Vertex {
152 |     pub fn id(&self) -> Id {
153 |         self.cell.id()
154 |     }
155 | }
156 | 
157 | pub trait ToVertexId {
158 |     fn to_id(&self) -> Id;
159 | }
160 | 
161 | impl ToVertexId for Vertex {
162 |     fn to_id(&self) -> Id {
163 |         self.cell.id()
164 |     }
165 | }
166 | 
167 | impl ToVertexId for Id {
168 |     fn to_id(&self) -> Id {
169 |         *self
170 |     }
171 | }
172 | 
173 | impl<'a> ToVertexId for &'a Id {
174 |     fn to_id(&self) -> Id {
175 |         **self
176 |     }
177 | }
178 | 
179 | impl<'a> ToVertexId for &'a Vertex {
180 |     fn to_id(&self) -> Id {
181 |         self.cell.id()
182 |     }
183 | }
184 | 
185 | impl<'a> Index<u64> for Vertex {
186 |     type Output = OwnedValue;
187 |     fn index(&self, index: u64) -> &Self::Output {
188 |         &self.cell.data[index]
189 |     }
190 | }
191 | 
192 | impl<'a> Index<&'a str> for Vertex {
193 |     type Output = OwnedValue;
194 |     fn index(&self, index: &'a str) -> &Self::Output {
195 |         &self.cell.data[index]
196 |     }
197 | }
198 | 
199 | impl<'a> IndexMut<&'a str> for Vertex {
200 |     fn index_mut(&mut self, index: &'a str) -> &mut Self::Output {
201 |         &mut self.cell[index]
202 |     }
203 | }
204 | 
205 | impl<'a> IndexMut<u64> for Vertex {
206 |     fn index_mut(&mut self, index: u64) -> &mut Self::Output {
207 |         &mut self.cell[index]
208 |     }
209 | }
210 | 


--------------------------------------------------------------------------------
/src/job/logger.rs:
--------------------------------------------------------------------------------
 1 | use parking_lot::Mutex;
 2 | use std::sync::Arc;
 3 | use std::time::{SystemTime, UNIX_EPOCH};
 4 | 
 5 | use lightning::map::{Map, PtrHashMap};
 6 | use serde::{Deserialize, Serialize};
 7 | 
 8 | use crate::job::JobId;
 9 | 
10 | pub type JobLogger = PtrHashMap<JobId, Arc<Mutex<Vec<JobLog>>>>;
11 | 
12 | #[derive(Debug, Clone, Serialize, Deserialize)]
13 | pub struct JobLog {
14 |     pub timestamp: u64,
15 |     pub level: JobLogLevel,
16 |     pub message: String,
17 | }
18 | 
19 | #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
20 | pub enum JobLogLevel {
21 |     Error,
22 |     Warning,
23 |     Info,
24 |     Trace,
25 | }
26 | 
27 | pub fn append_job_log(
28 |     job_logger: &Arc<JobLogger>,
29 |     job_id: JobId,
30 |     level: JobLogLevel,
31 |     message: String,
32 | ) {
33 |     if cfg!(debug_assertions) && level != JobLogLevel::Trace {
34 |         println!("[{:?} - {:?}] {}", job_id, level, message);
35 |         return;
36 |     }
37 |     if level == JobLogLevel::Trace {
38 |         return;
39 |     }
40 |     match job_logger.get(&job_id) {
41 |         Some(job_logger) => {
42 |             job_logger.lock().push(JobLog {
43 |                 timestamp: SystemTime::now()
44 |                     .duration_since(UNIX_EPOCH)
45 |                     .unwrap()
46 |                     .as_secs(),
47 |                 level,
48 |                 message,
49 |             });
50 |         }
51 |         None => {
52 |             // error!(
53 |             //     "Failed to get job logger by job id: {:?}, inserting new job logger",
54 |             //     job_id
55 |             // );
56 |             job_logger.try_insert(job_id, Arc::new(Mutex::new(Vec::new())));
57 |         }
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/job/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::traversal::navigation::App;
 2 | use dovahkiin::types::Id;
 3 | use parking_lot::Mutex;
 4 | use serde::{Deserialize, Serialize};
 5 | use std::sync::Arc;
 6 | 
 7 | pub mod logger;
 8 | pub mod service;
 9 | 
10 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11 | pub struct JobId {
12 |     pub coordinator_server_id: u64,
13 |     pub coordinator_job_id: u64,
14 | }
15 | 
16 | #[derive(Debug, Clone, Serialize, Deserialize)]
17 | pub struct JobReport {
18 |     pub status: JobStatus,
19 |     pub result: Vec<u8>,
20 | }
21 | 
22 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
23 | pub enum JobStatus {
24 |     Created,
25 |     Initialized,
26 |     Running,
27 |     Finished,
28 |     Failed,
29 | }
30 | 
31 | pub struct Job {
32 |     pub job_id: JobId,
33 |     pub app: App,
34 |     pub config: Vec<u8>,
35 |     pub report: Arc<Mutex<JobReport>>,
36 | }
37 | 
38 | pub enum CollectiveResult {
39 |     Found(Vec<(Id, u64)>),
40 |     Continue(Vec<(Id, u64, f32)>),
41 |     Error(Vec<(Id, u64, String)>),
42 |     NotFound,
43 | }
44 | 
45 | impl JobReport {
46 |     pub fn new() -> Self {
47 |         Self {
48 |             status: JobStatus::Created,
49 |             result: vec![],
50 |         }
51 |     }
52 | 
53 |     pub fn update_status(&mut self, status: JobStatus) {
54 |         self.status = status;
55 |     }
56 | 
57 |     pub fn update_findings(&mut self, findings: Vec<u8>) {
58 |         self.result = findings;
59 |     }
60 | }
61 | 
62 | pub fn initialize_job_run(report: &Arc<Mutex<JobReport>>) {
63 |     report.lock().update_status(JobStatus::Running);
64 | }
65 | 
66 | impl JobId {
67 |     pub fn new(coordinator_server_id: u64, coordinator_job_id: u64) -> Self {
68 |         Self {
69 |             coordinator_server_id,
70 |             coordinator_job_id,
71 |         }
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/src/job/service.rs:
--------------------------------------------------------------------------------
 1 | use bifrost::{dispatch_rpc_service_functions, service};
 2 | use futures::future::BoxFuture;
 3 | use lightning::map::Map;
 4 | use parking_lot::Mutex;
 5 | 
 6 | use super::{
 7 |     logger::{JobLog, JobLogger},
 8 |     JobId,
 9 | };
10 | 
11 | service! {
12 |     rpc init_node_logger(job_id: JobId) -> Result<(), String>;
13 |     rpc get_node_logs(job_id: JobId) -> Option<Vec<JobLog>>;
14 |     rpc retire_node_logger(job_id: JobId) -> Result<(), String>;
15 | }
16 | 
17 | pub struct JobService {
18 |     pub job_logger: Arc<JobLogger>,
19 | }
20 | 
21 | impl Service for JobService {
22 |     fn init_node_logger<'a>(&'a self, job_id: JobId) -> BoxFuture<'a, Result<(), String>> {
23 |         self.job_logger
24 |             .insert_no_rt(job_id, Arc::new(Mutex::new(Vec::new())));
25 |         future::ready(Ok(())).boxed()
26 |     }
27 | 
28 |     fn get_node_logs<'a>(
29 |         &'a self,
30 |         job_id: JobId,
31 |     ) -> ::futures::future::BoxFuture<'a, Option<Vec<JobLog>>> {
32 |         future::ready(
33 |             self.job_logger
34 |                 .get(&job_id)
35 |                 .map(|logger| logger.lock().clone()),
36 |         )
37 |         .boxed()
38 |     }
39 | 
40 |     fn retire_node_logger<'a>(
41 |         &'a self,
42 |         job_id: JobId,
43 |     ) -> ::futures::future::BoxFuture<'a, Result<(), String>> {
44 |         self.job_logger.remove(&job_id);
45 |         future::ready(Ok(())).boxed()
46 |     }
47 | }
48 | 
49 | impl JobService {
50 |     pub fn logger(&self) -> &Arc<JobLogger> {
51 |         &self.job_logger
52 |     }
53 | }
54 | 
55 | dispatch_rpc_service_functions!(JobService);
56 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | #![feature(proc_macro)]
 2 | #![feature(conservative_impl_trait)]
 3 | 
 4 | extern crate neb;
 5 | #[macro_use]
 6 | extern crate lazy_static;
 7 | extern crate bifrost;
 8 | extern crate bifrost_hasher;
 9 | #[macro_use]
10 | extern crate bifrost_plugins;
11 | extern crate parking_lot;
12 | extern crate serde;
13 | #[macro_use]
14 | extern crate serde_derive;
15 | #[macro_use]
16 | extern crate log;
17 | extern crate env_logger;
18 | extern crate log4rs;
19 | extern crate serde_yaml;
20 | extern crate yaml_rust;
21 | 
22 | mod apps;
23 | mod config;
24 | mod graph;
25 | mod job;
26 | mod query;
27 | mod server;
28 | #[cfg(test)]
29 | mod tests;
30 | mod traversal;
31 | mod utils;
32 | use std::thread;
33 | 
34 | #[tokio::main(flavor = "multi_thread")]
35 | async fn main() {
36 |     log4rs::init_file("config/log4rs.yaml", Default::default()).unwrap();
37 |     info!("Shisoft Morpheus is initializing...");
38 |     query::init().unwrap();
39 |     let config = config::options_from_file("config/server.yaml");
40 |     server::MorpheusServer::new(config).await.unwrap();
41 | }
42 | 


--------------------------------------------------------------------------------
/src/query/mod.rs:
--------------------------------------------------------------------------------
  1 | use crate::graph::edge::Edge;
  2 | use crate::graph::vertex::Vertex;
  3 | use dovahkiin::expr;
  4 | use dovahkiin::types::OwnedValue;
  5 | use neb::dovahkiin::expr::interpreter::Interpreter;
  6 | use neb::dovahkiin::expr::symbols::bindings::bind;
  7 | use neb::dovahkiin::expr::symbols::utils::is_true;
  8 | use neb::dovahkiin::expr::SExpr;
  9 | use neb::dovahkiin::integrated::lisp::parse_to_sexpr;
 10 | use neb::dovahkiin::types::Value;
 11 | 
 12 | pub static VERTEX_SYMBOL: u64 = hash_ident!(vertex) as u64;
 13 | pub static EDGE_SYMBOL: u64 = hash_ident!(edge) as u64;
 14 | 
 15 | #[derive(Debug)]
 16 | pub enum InitQueryError {
 17 |     CannotInitSymbols,
 18 | }
 19 | 
 20 | pub mod symbols;
 21 | 
 22 | pub fn init() -> Result<(), InitQueryError> {
 23 |     symbols::init_symbols().map_err(|_| InitQueryError::CannotInitSymbols)?;
 24 |     Ok(())
 25 | }
 26 | 
 27 | pub trait Expr {
 28 |     fn to_sexpr(&self) -> Result<Vec<SExpr>, String>;
 29 | }
 30 | 
 31 | impl Expr for String {
 32 |     fn to_sexpr(&self) -> Result<Vec<SExpr>, String> {
 33 |         parse_to_sexpr(&self)
 34 |     }
 35 | }
 36 | 
 37 | impl<'a> Expr for &'a str {
 38 |     fn to_sexpr(&self) -> Result<Vec<SExpr>, String> {
 39 |         parse_to_sexpr(self)
 40 |     }
 41 | }
 42 | 
 43 | impl<'a> Expr for &'a Vec<SExpr<'_>> {
 44 |     fn to_sexpr(&self) -> Result<Vec<SExpr>, String> {
 45 |         return Ok(self.to_vec());
 46 |     }
 47 | }
 48 | 
 49 | pub struct Tester<'a> {
 50 |     core: Interpreter<'a>,
 51 | }
 52 | 
 53 | fn prep_interp<'a>() -> Interpreter<'a> {
 54 |     Interpreter::new()
 55 | }
 56 | 
 57 | pub fn parse_optional_expr<E>(expr: &Option<E>) -> Result<Option<Vec<SExpr>>, String>
 58 | where
 59 |     E: Expr,
 60 | {
 61 |     match expr {
 62 |         &Some(ref expr) => {
 63 |             let expr_owned = expr.clone();
 64 |             Ok(Some(expr_owned.to_sexpr()?))
 65 |         }
 66 |         &None => Ok(None),
 67 |     }
 68 | }
 69 | 
 70 | impl<'a> Tester<'a> {
 71 |     pub async fn eval_with_edge_and_vertex(
 72 |         sexpr: &Option<Vec<SExpr<'a>>>,
 73 |         vertex: &Vertex,
 74 |         edge: &Edge,
 75 |     ) -> Result<bool, String> {
 76 |         let sexpr = sexpr.clone(); // TODO: Memory management
 77 |         let sexpr = if let Some(expr) = sexpr {
 78 |             expr
 79 |         } else {
 80 |             return Ok(true);
 81 |         };
 82 |         let mut interp = prep_interp();
 83 |         bind(
 84 |             interp.get_env(),
 85 |             VERTEX_SYMBOL,
 86 |             SExpr::Value(expr::Value::Owned(vertex.cell.data.clone())),
 87 |         );
 88 |         bind(
 89 |             interp.get_env(),
 90 |             EDGE_SYMBOL,
 91 |             SExpr::Value(if let &Some(ref e) = edge.get_data().await {
 92 |                 expr::Value::Owned(e.data.clone())
 93 |             } else {
 94 |                 expr::Value::Owned(OwnedValue::Null)
 95 |             }),
 96 |         );
 97 |         Ok(is_true(&interp.eval(sexpr)?))
 98 |     }
 99 | 
100 |     pub fn eval_with_vertex(sexpr: &Option<Vec<SExpr>>, vertex: &Vertex) -> Result<bool, String> {
101 |         let sexpr = sexpr.clone(); // TODO: Memory management
102 |         let sexpr = if let Some(expr) = sexpr {
103 |             expr
104 |         } else {
105 |             return Ok(true);
106 |         };
107 |         let mut interp = prep_interp();
108 |         bind(
109 |             interp.get_env(),
110 |             VERTEX_SYMBOL,
111 |             SExpr::Value(expr::Value::Owned(vertex.cell.data.clone())),
112 |         );
113 |         Ok(is_true(&interp.eval(sexpr)?))
114 |     }
115 | 
116 |     pub async fn eval_with_edge(
117 |         sexpr: &Option<Vec<SExpr<'a>>>,
118 |         edge: &Edge,
119 |     ) -> Result<bool, String> {
120 |         let sexpr = sexpr.clone(); // TODO: Memory management
121 |         let sexpr = if let Some(expr) = sexpr {
122 |             expr
123 |         } else {
124 |             return Ok(true);
125 |         };
126 |         let mut interp = prep_interp();
127 |         bind(
128 |             interp.get_env(),
129 |             EDGE_SYMBOL,
130 |             SExpr::Value(if let &Some(ref e) = edge.get_data().await {
131 |                 expr::Value::Owned(e.data.clone())
132 |             } else {
133 |                 expr::Value::Owned(OwnedValue::Null)
134 |             }),
135 |         );
136 |         Ok(is_true(&interp.eval(sexpr)?))
137 |     }
138 | }
139 | 


--------------------------------------------------------------------------------
/src/query/symbols/crud/cell.rs:
--------------------------------------------------------------------------------
 1 | use dovahkiin::expr::interpreter::Environment;
 2 | use neb::dovahkiin::expr::symbols::Symbol;
 3 | use neb::dovahkiin::expr::SExpr;
 4 | 
 5 | // (insert-cell "<schema>" (hashmap "<key-1>" <value-1> "<key-2>" (hashmap "<key-3>" <value-2>)))
 6 | #[derive(Debug)]
 7 | pub struct Insert {}
 8 | impl Symbol for Insert {
 9 |     fn eval<'a>(
10 |         &self,
11 |         exprs: Vec<SExpr<'a>>,
12 |         env: &mut Environment<'a>,
13 |     ) -> Result<SExpr<'a>, String> {
14 |         unimplemented!();
15 |     }
16 |     fn is_macro(&self) -> bool {
17 |         true
18 |     }
19 | }
20 | 
21 | // (select-cell "<schema>" <cell-id>)
22 | // (select-cell "<schema>" (hashmap "<key>" <value>)) // until index is done
23 | #[derive(Debug)]
24 | pub struct Select {}
25 | impl Symbol for Select {
26 |     fn eval<'a>(
27 |         &self,
28 |         exprs: Vec<SExpr<'a>>,
29 |         env: &mut Environment<'a>,
30 |     ) -> Result<SExpr<'a>, String> {
31 |         unimplemented!();
32 |     }
33 |     fn is_macro(&self) -> bool {
34 |         true
35 |     }
36 | }
37 | 
38 | // (update-cell "<schema>" <cell-id> (hashmap ...))
39 | // (update-cell "<schema>" (hashmap ...) (hashmap ...)) // until index is done
40 | #[derive(Debug)]
41 | pub struct Update {}
42 | impl Symbol for Update {
43 |     fn eval<'a>(
44 |         &self,
45 |         exprs: Vec<SExpr<'a>>,
46 |         env: &mut Environment<'a>,
47 |     ) -> Result<SExpr<'a>, String> {
48 |         unimplemented!();
49 |     }
50 |     fn is_macro(&self) -> bool {
51 |         true
52 |     }
53 | }
54 | 
55 | // (delete-cell "<schema>" <cell-id>)
56 | // (delete-cell "<schema>" (hashmap ...)) // until index is done
57 | #[derive(Debug)]
58 | pub struct Delete {}
59 | impl Symbol for Delete {
60 |     fn eval<'a>(
61 |         &self,
62 |         exprs: Vec<SExpr<'a>>,
63 |         env: &mut Environment<'a>,
64 |     ) -> Result<SExpr<'a>, String> {
65 |         unimplemented!();
66 |     }
67 |     fn is_macro(&self) -> bool {
68 |         true
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/query/symbols/crud/edge.rs:
--------------------------------------------------------------------------------
 1 | use dovahkiin::expr::interpreter::Environment;
 2 | use neb::dovahkiin::expr::symbols::Symbol;
 3 | use neb::dovahkiin::expr::SExpr;
 4 | 
 5 | #[derive(Debug)]
 6 | pub struct Insert {}
 7 | impl Symbol for Insert {
 8 |     fn eval<'a>(
 9 |         &self,
10 |         exprs: Vec<SExpr<'a>>,
11 |         env: &mut Environment<'a>,
12 |     ) -> Result<SExpr<'a>, String> {
13 |         unimplemented!();
14 |     }
15 |     fn is_macro(&self) -> bool {
16 |         true
17 |     }
18 | }
19 | 
20 | #[derive(Debug)]
21 | pub struct Select {}
22 | impl Symbol for Select {
23 |     fn eval<'a>(
24 |         &self,
25 |         exprs: Vec<SExpr<'a>>,
26 |         env: &mut Environment<'a>,
27 |     ) -> Result<SExpr<'a>, String> {
28 |         unimplemented!();
29 |     }
30 |     fn is_macro(&self) -> bool {
31 |         true
32 |     }
33 | }
34 | 
35 | #[derive(Debug)]
36 | pub struct Update {}
37 | impl Symbol for Update {
38 |     fn eval<'a>(
39 |         &self,
40 |         exprs: Vec<SExpr<'a>>,
41 |         env: &mut Environment<'a>,
42 |     ) -> Result<SExpr<'a>, String> {
43 |         unimplemented!();
44 |     }
45 |     fn is_macro(&self) -> bool {
46 |         true
47 |     }
48 | }
49 | 
50 | #[derive(Debug)]
51 | pub struct Delete {}
52 | impl Symbol for Delete {
53 |     fn eval<'a>(
54 |         &self,
55 |         exprs: Vec<SExpr<'a>>,
56 |         env: &mut Environment<'a>,
57 |     ) -> Result<SExpr<'a>, String> {
58 |         unimplemented!();
59 |     }
60 |     fn is_macro(&self) -> bool {
61 |         true
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/query/symbols/crud/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod cell;
2 | pub mod edge;
3 | pub mod vertex;
4 | 


--------------------------------------------------------------------------------
/src/query/symbols/crud/vertex.rs:
--------------------------------------------------------------------------------
 1 | use dovahkiin::expr::interpreter::Environment;
 2 | use neb::dovahkiin::expr::symbols::Symbol;
 3 | use neb::dovahkiin::expr::SExpr;
 4 | 
 5 | #[derive(Debug)]
 6 | pub struct Insert {}
 7 | impl Symbol for Insert {
 8 |     fn eval<'a>(
 9 |         &self,
10 |         exprs: Vec<SExpr<'a>>,
11 |         env: &mut Environment<'a>,
12 |     ) -> Result<SExpr<'a>, String> {
13 |         unimplemented!();
14 |     }
15 |     fn is_macro(&self) -> bool {
16 |         true
17 |     }
18 | }
19 | 
20 | #[derive(Debug)]
21 | pub struct Select {}
22 | impl Symbol for Select {
23 |     fn eval<'a>(
24 |         &self,
25 |         exprs: Vec<SExpr<'a>>,
26 |         env: &mut Environment<'a>,
27 |     ) -> Result<SExpr<'a>, String> {
28 |         unimplemented!();
29 |     }
30 |     fn is_macro(&self) -> bool {
31 |         true
32 |     }
33 | }
34 | 
35 | #[derive(Debug)]
36 | pub struct Update {}
37 | impl Symbol for Update {
38 |     fn eval<'a>(
39 |         &self,
40 |         exprs: Vec<SExpr<'a>>,
41 |         env: &mut Environment<'a>,
42 |     ) -> Result<SExpr<'a>, String> {
43 |         unimplemented!();
44 |     }
45 |     fn is_macro(&self) -> bool {
46 |         true
47 |     }
48 | }
49 | 
50 | #[derive(Debug)]
51 | pub struct Delete {}
52 | impl Symbol for Delete {
53 |     fn eval<'a>(
54 |         &self,
55 |         exprs: Vec<SExpr<'a>>,
56 |         env: &mut Environment<'a>,
57 |     ) -> Result<SExpr<'a>, String> {
58 |         unimplemented!();
59 |     }
60 |     fn is_macro(&self) -> bool {
61 |         true
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/query/symbols/mod.rs:
--------------------------------------------------------------------------------
 1 | use neb::dovahkiin::expr::symbols::ISYMBOL_MAP;
 2 | 
 3 | pub mod crud;
 4 | 
 5 | pub fn init_symbols() -> Result<(), ()> {
 6 |     ISYMBOL_MAP.insert("insert-cell", crud::cell::Insert {})?;
 7 |     ISYMBOL_MAP.insert("insert-vertex", crud::vertex::Insert {})?;
 8 | 
 9 |     ISYMBOL_MAP.insert("select-cell", crud::cell::Select {})?;
10 |     ISYMBOL_MAP.insert("select-vertex", crud::vertex::Select {})?;
11 |     ISYMBOL_MAP.insert("select-edge", crud::edge::Select {})?;
12 | 
13 |     ISYMBOL_MAP.insert("update-cell", crud::cell::Update {})?;
14 |     ISYMBOL_MAP.insert("update-vertex", crud::vertex::Update {})?;
15 |     ISYMBOL_MAP.insert("update-edge", crud::edge::Update {})?;
16 | 
17 |     ISYMBOL_MAP.insert("delete-cell", crud::cell::Delete {})?;
18 |     ISYMBOL_MAP.insert("delete-vertex", crud::vertex::Delete {})?;
19 |     ISYMBOL_MAP.insert("delete-edge", crud::edge::Delete {})?;
20 |     Ok(())
21 | }
22 | 


--------------------------------------------------------------------------------
/src/server/general.rs:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/server/mod.rs:
--------------------------------------------------------------------------------
  1 | use bifrost::raft::state_machine::master::ExecError;
  2 | use bifrost::rpc;
  3 | use bifrost::tcp::STANDALONE_ADDRESS_STRING;
  4 | use futures::prelude::*;
  5 | use futures::{future, Future};
  6 | use neb::client::{AsyncClient as NebClient, NebClientError};
  7 | use neb::server::{NebServer, ServerError, ServerOptions as NebServerOptions};
  8 | use std::sync::Arc;
  9 | 
 10 | use crate::apps::hnsw::coordinator::HNSWIndexService;
 11 | use crate::apps::hnsw::partition::service::HNSW_PARTITION_SERVICE_ID;
 12 | use crate::apps::hnsw::{HNSWPartitionService, VectorIndexer};
 13 | use crate::graph::GraphEngine;
 14 | use crate::job::logger::JobLogger;
 15 | 
 16 | pub mod general;
 17 | pub mod schema;
 18 | pub mod traversal;
 19 | 
 20 | #[derive(Debug)]
 21 | pub enum MorpheusServerError {
 22 |     ServerError(ServerError),
 23 |     ClientError(NebClientError),
 24 |     InitSchemaError(ExecError),
 25 | }
 26 | 
 27 | pub struct MorpheusServer {
 28 |     pub neb_server: Arc<NebServer>,
 29 |     pub neb_client: Arc<NebClient>,
 30 |     pub schema_container: Arc<schema::SchemaContainer>,
 31 |     pub graph: Arc<GraphEngine>,
 32 |     pub job_logger: Arc<JobLogger>,
 33 | }
 34 | 
 35 | #[derive(Debug, Serialize, Deserialize)]
 36 | pub struct MorphesOptions {
 37 |     pub server_addr: String,
 38 |     pub group_name: String,
 39 |     pub storage: NebServerOptions,
 40 |     pub meta_members: Vec<String>,
 41 | }
 42 | 
 43 | impl MorpheusServer {
 44 |     pub async fn new(options: MorphesOptions) -> Result<Arc<MorpheusServer>, MorpheusServerError> {
 45 |         let neb_opts = &options.storage;
 46 |         let group_name = &options.group_name;
 47 |         let neb_server = NebServer::new_from_opts(neb_opts, &options.server_addr, group_name).await;
 48 |         let neb_client = Arc::new(
 49 |             neb::client::AsyncClient::new(
 50 |                 &neb_server.rpc,
 51 |                 &neb_server.membership,
 52 |                 &options.meta_members,
 53 |                 group_name,
 54 |             )
 55 |             .await
 56 |             .unwrap(),
 57 |         );
 58 |         debug!("Initializing schemas");
 59 |         schema::SchemaContainer::new_meta_service(group_name, &neb_server.raft_service).await;
 60 |         let schema_container = schema::SchemaContainer::new_client(
 61 |             group_name,
 62 |             &neb_client.raft_client,
 63 |             &neb_client,
 64 |             &neb_server.meta,
 65 |         )
 66 |         .await
 67 |         .map_err(MorpheusServerError::InitSchemaError)?;
 68 |         debug!("Schema container initialized");
 69 |         let graph = Arc::new(
 70 |             GraphEngine::new(&schema_container, &neb_client, &neb_server)
 71 |                 .map_err(MorpheusServerError::InitSchemaError)
 72 |                 .await?,
 73 |         );
 74 |         let job_logger = Arc::new(JobLogger::with_capacity(64));
 75 |         Ok(Arc::new(MorpheusServer {
 76 |             neb_server,
 77 |             neb_client,
 78 |             schema_container,
 79 |             graph,
 80 |             job_logger,
 81 |         }))
 82 |     }
 83 | 
 84 |     pub async fn init_hnsw_index_partition_service(
 85 |         &self,
 86 |     ) -> Result<Arc<HNSWPartitionService>, String> {
 87 |         let service = HNSWPartitionService::new(
 88 |             self.neb_server.server_id,
 89 |             &self.neb_server.consh,
 90 |             &self.neb_server.raft_client,
 91 |             &self.neb_server.chunks,
 92 |             &self.graph,
 93 |             &self.job_logger,
 94 |         )
 95 |         .await?;
 96 |         let service_ref = Arc::new(service);
 97 |         self.neb_server.rpc.register_service(&service_ref).await;
 98 |         Ok(service_ref)
 99 |     }
100 | 
101 |     pub async fn init_hnsw_index_service(&self) -> Result<Arc<HNSWIndexService>, String> {
102 |         let service = HNSWIndexService::new(
103 |             &self.graph,
104 |             &self.neb_server.consh,
105 |             &self.neb_server.raft_client,
106 |         )
107 |         .await?;
108 |         let service_ref = Arc::new(service);
109 |         self.neb_server.rpc.register_service(&service_ref).await;
110 |         VectorIndexer::new_and_set_core(&self.neb_server).await;
111 |         Ok(service_ref)
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/src/server/schema/mod.rs:
--------------------------------------------------------------------------------
  1 | use crate::graph::edge;
  2 | use crate::graph::edge::{EdgeAttributes, EdgeType};
  3 | use crate::graph::fields::VERTEX_TEMPLATE;
  4 | use crate::server::schema::sm::client::SMClient;
  5 | use bifrost::raft::client::RaftClient;
  6 | use bifrost::raft::state_machine::master::ExecError;
  7 | use bifrost::raft::RaftService;
  8 | use bifrost_hasher::hash_str;
  9 | use dovahkiin::types::Type;
 10 | use futures::{future, Future, FutureExt, TryFutureExt};
 11 | use lightning::map::{Map, PtrHashMap as LFHashMap};
 12 | use neb::client::AsyncClient as NebClient;
 13 | use neb::ram::schema::{DelSchemaError, Field, NewSchemaError, Schema};
 14 | use neb::server::ServerMeta as NebServerMeta;
 15 | use std::sync::Arc;
 16 | 
 17 | mod sm;
 18 | 
 19 | #[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq)]
 20 | pub enum GraphSchema {
 21 |     Unspecified,
 22 |     Vertex,
 23 |     Edge(EdgeAttributes),
 24 | }
 25 | 
 26 | #[derive(Serialize, Deserialize, Debug, Clone)]
 27 | pub enum SchemaError {
 28 |     NebSchemaExecError(ExecError),
 29 |     MorpheusSchemaExecError(ExecError),
 30 |     NewNebSchemaVerificationError(NewSchemaError),
 31 |     DelNebSchemaVerificationError(DelSchemaError),
 32 |     SimpleEdgeShouldNotHaveSchema,
 33 |     SchemaTypeUnspecified,
 34 | }
 35 | 
 36 | pub struct SchemaContainer {
 37 |     pub neb_client: Arc<NebClient>,
 38 |     map: Arc<LFHashMap<u32, GraphSchema>>,
 39 |     sm_client: Arc<SMClient>,
 40 |     neb_mata: Arc<NebServerMeta>,
 41 | }
 42 | 
 43 | #[derive(Clone, Debug)]
 44 | pub struct MorpheusSchema {
 45 |     pub id: u32,
 46 |     pub name: String,
 47 |     pub schema_type: GraphSchema,
 48 |     pub key_field: Option<Vec<String>>,
 49 |     pub fields: Vec<Field>,
 50 |     pub is_dynamic: bool,
 51 | }
 52 | 
 53 | lazy_static! {
 54 |     pub static ref EMPTY_FIELDS: Vec<Field> = Vec::new();
 55 | }
 56 | 
 57 | impl MorpheusSchema {
 58 |     pub fn new_with_id_and_type<'a>(
 59 |         id: u32,
 60 |         name: &'a str,
 61 |         key_field: Option<&Vec<String>>,
 62 |         fields: &Vec<Field>,
 63 |         is_dynamic: bool,
 64 |         schema_type: GraphSchema,
 65 |     ) -> MorpheusSchema {
 66 |         MorpheusSchema {
 67 |             id,
 68 |             name: name.to_string(),
 69 |             key_field: key_field.cloned(),
 70 |             fields: fields.clone(),
 71 |             schema_type,
 72 |             is_dynamic,
 73 |         }
 74 |     }
 75 | 
 76 |     pub fn new_with_id<'a>(
 77 |         id: u32,
 78 |         name: &'a str,
 79 |         key_field: Option<&Vec<String>>,
 80 |         fields: &Vec<Field>,
 81 |         is_dynamic: bool,
 82 |     ) -> MorpheusSchema {
 83 |         MorpheusSchema::new_with_id_and_type(
 84 |             id,
 85 |             name,
 86 |             key_field,
 87 |             fields,
 88 |             is_dynamic,
 89 |             GraphSchema::Unspecified,
 90 |         )
 91 |     }
 92 | 
 93 |     pub fn new_vertex_with_id<'a>(
 94 |         id: u32,
 95 |         name: &'a str,
 96 |         key_field: Option<&Vec<String>>,
 97 |         fields: &Vec<Field>,
 98 |         is_dynamic: bool,
 99 |     ) -> MorpheusSchema {
100 |         MorpheusSchema::new_with_id_and_type(
101 |             id,
102 |             name,
103 |             key_field,
104 |             fields,
105 |             is_dynamic,
106 |             GraphSchema::Vertex,
107 |         )
108 |     }
109 | 
110 |     pub fn new_edge_with_id<'a>(
111 |         id: u32,
112 |         name: &'a str,
113 |         key_field: Option<&Vec<String>>,
114 |         fields: &Vec<Field>,
115 |         edge_attr: EdgeAttributes,
116 |         is_dynamic: bool,
117 |     ) -> MorpheusSchema {
118 |         MorpheusSchema::new_with_id_and_type(
119 |             id,
120 |             name,
121 |             key_field,
122 |             fields,
123 |             is_dynamic,
124 |             GraphSchema::Edge(edge_attr),
125 |         )
126 |     }
127 | 
128 |     pub fn new<'a>(
129 |         name: &'a str,
130 |         key_field: Option<&Vec<String>>,
131 |         fields: &Vec<Field>,
132 |         is_dynamic: bool,
133 |     ) -> MorpheusSchema {
134 |         MorpheusSchema::new_with_id(0, name, key_field, fields, is_dynamic)
135 |     }
136 | 
137 |     pub fn into_ref(self) -> Arc<MorpheusSchema> {
138 |         Arc::new(self)
139 |     }
140 | }
141 | 
142 | pub fn cell_fields(
143 |     schema_type: GraphSchema,
144 |     mut body_fields: Vec<Field>,
145 | ) -> Result<Vec<Field>, SchemaError> {
146 |     let mut fields = match schema_type {
147 |         GraphSchema::Vertex => VERTEX_TEMPLATE.clone(),
148 |         GraphSchema::Edge(edge_attr) => {
149 |             if !edge_attr.has_body && body_fields.len() > 0 {
150 |                 return Err(SchemaError::SimpleEdgeShouldNotHaveSchema);
151 |             }
152 |             match edge_attr.edge_type {
153 |                 EdgeType::Directed => edge::directed::EDGE_TEMPLATE.clone(),
154 |                 EdgeType::Undirected => edge::undirectd::EDGE_TEMPLATE.clone(),
155 |             }
156 |         }
157 |         GraphSchema::Unspecified => return Err(SchemaError::SchemaTypeUnspecified),
158 |     };
159 |     fields.append(&mut body_fields);
160 |     Ok(fields)
161 | }
162 | 
163 | pub fn generate_sm_id<'a>(group: &'a str) -> u64 {
164 |     hash_str(&format!("{}-{}", sm::DEFAULT_RAFT_PREFIX, group))
165 | }
166 | 
167 | impl SchemaContainer {
168 |     pub async fn new_meta_service<'a>(group: &'a str, raft_service: &Arc<RaftService>) {
169 |         let container_sm = sm::GraphSchemasSM::new(generate_sm_id(group), raft_service).await;
170 |         raft_service
171 |             .register_state_machine(Box::new(container_sm))
172 |             .await;
173 |     }
174 | 
175 |     pub async fn new_client<'a>(
176 |         group: &'a str,
177 |         raft_client: &Arc<RaftClient>,
178 |         neb_client: &Arc<NebClient>,
179 |         neb_meta: &Arc<NebServerMeta>,
180 |     ) -> Result<Arc<SchemaContainer>, ExecError> {
181 |         let sm_client = Arc::new(SMClient::new(generate_sm_id(group), &raft_client));
182 |         let sm_entries = sm_client.get_all().await?;
183 |         let container = SchemaContainer {
184 |             map: Arc::new(LFHashMap::with_capacity(64)),
185 |             sm_client: sm_client.clone(),
186 |             neb_client: neb_client.clone(),
187 |             neb_mata: neb_meta.clone(),
188 |         };
189 |         let container_ref = Arc::new(container);
190 |         let container_ref1 = container_ref.clone();
191 |         let container_ref2 = container_ref.clone();
192 |         for (schema_id, schema_type) in sm_entries {
193 |             container_ref.map.insert(schema_id, schema_type);
194 |         }
195 |         let _r1 = sm_client
196 |             .on_schema_added(move |res| {
197 |                 let (id, schema_type) = res;
198 |                 container_ref1.map.insert(id, schema_type);
199 |                 future::ready(()).boxed()
200 |             })
201 |             .await?
202 |             .unwrap();
203 |         let _r2 = sm_client
204 |             .on_schema_deleted(move |id| {
205 |                 container_ref2.map.remove(&id);
206 |                 future::ready(()).boxed()
207 |             })
208 |             .await?
209 |             .unwrap();
210 |         return Ok(container_ref);
211 |     }
212 | 
213 |     // Note that if the edge does not have a body, the schema will not be used to generate the edge cell
214 |     pub async fn new_schema(&self, schema: MorpheusSchema) -> Result<u32, SchemaError> {
215 |         let schema_type = schema.schema_type;
216 |         let sm_client = &self.sm_client;
217 |         let neb_client = &self.neb_client;
218 |         let schema_fields = cell_fields(schema_type, schema.fields.clone())?;
219 |         let neb_schema = Schema::new_with_id(
220 |             schema.id,
221 |             &schema.name,
222 |             schema.key_field.clone(),
223 |             Field::new_schema(schema_fields),
224 |             schema.is_dynamic,
225 |             false,
226 |         );
227 |         let schema_id = if schema.id == 0 {
228 |             neb_client
229 |                 .new_schema(neb_schema)
230 |                 .await
231 |                 .map_err(|e| SchemaError::NebSchemaExecError(e))?
232 |                 .map_err(|e| SchemaError::NewNebSchemaVerificationError(e))?
233 |         } else {
234 |             neb_client
235 |                 .new_schema_with_id(neb_schema)
236 |                 .await
237 |                 .map_err(|e| SchemaError::NebSchemaExecError(e))?
238 |                 .map_err(|e| SchemaError::NewNebSchemaVerificationError(e))?;
239 |             schema.id
240 |         };
241 |         match sm_client.new_schema(&schema_id, &schema_type).await {
242 |             Ok(_) => Ok(schema_id),
243 |             Err(e) => Err(SchemaError::MorpheusSchemaExecError(e)),
244 |         }
245 |     }
246 | 
247 |     pub async fn del_schema(&self, schema_name: &String) -> Result<(), SchemaError> {
248 |         self.neb_client
249 |             .del_schema(schema_name.clone())
250 |             .await
251 |             .map_err(|e| SchemaError::NebSchemaExecError(e))?
252 |             .map_err(|e| SchemaError::DelNebSchemaVerificationError(e))
253 |     }
254 | 
255 |     pub fn schema_type(&self, schema_id: u32) -> Option<GraphSchema> {
256 |         Self::schema_type_(&self.map, schema_id)
257 |     }
258 | 
259 |     fn schema_type_(map: &Arc<LFHashMap<u32, GraphSchema>>, schema_id: u32) -> Option<GraphSchema> {
260 |         map.get(&schema_id)
261 |     }
262 | 
263 |     pub fn id_from_name<'a>(&self, name: &'a str) -> Option<u32> {
264 |         self.neb_mata.schemas.name_to_id(name)
265 |     }
266 | 
267 |     pub fn from_name<'a>(&self, name: &'a str) -> Option<MorpheusSchema> {
268 |         let schema_id = self.id_from_name(name).unwrap_or(0);
269 |         match self.get_neb_schema(schema_id) {
270 |             Some(neb_schema) => self.neb_to_morpheus_schema(&neb_schema),
271 |             None => None,
272 |         }
273 |     }
274 | 
275 |     pub fn get_neb_schema(&self, schema_id: u32) -> Option<Arc<Schema>> {
276 |         self.neb_mata.schemas.get(&schema_id)
277 |     }
278 |     pub fn neb_to_morpheus_schema(&self, schema: &Arc<Schema>) -> Option<MorpheusSchema> {
279 |         Self::neb_to_morpheus_schema_(&self.map, schema)
280 |     }
281 |     fn neb_to_morpheus_schema_(
282 |         schema_map: &Arc<LFHashMap<u32, GraphSchema>>,
283 |         schema: &Arc<Schema>,
284 |     ) -> Option<MorpheusSchema> {
285 |         if let Some(schema_type) = Self::schema_type_(schema_map, schema.id) {
286 |             if let Some(ref fields) = schema.fields.sub_fields {
287 |                 Some(MorpheusSchema {
288 |                     id: schema.id,
289 |                     name: schema.name.clone(),
290 |                     schema_type,
291 |                     key_field: schema.str_key_field.clone(),
292 |                     fields: fields.clone(),
293 |                     is_dynamic: schema.is_dynamic,
294 |                 })
295 |             } else {
296 |                 None
297 |             }
298 |         } else {
299 |             None
300 |         }
301 |     }
302 |     pub async fn all_morpheus_schemas(&self) -> Result<Vec<MorpheusSchema>, ExecError> {
303 |         let schema_map = self.map.clone();
304 |         self.neb_client
305 |             .get_all_schema()
306 |             .await
307 |             .map(move |neb_schemas| {
308 |                 neb_schemas
309 |                     .into_iter()
310 |                     .map(|schema| Self::neb_to_morpheus_schema_(&schema_map, &Arc::new(schema)))
311 |                     .filter_map(|ms| ms)
312 |                     .collect()
313 |             })
314 |     }
315 | 
316 |     pub async fn count(&self) -> Result<usize, ExecError> {
317 |         self.all_morpheus_schemas().await.map(|x| x.len())
318 |     }
319 | }
320 | 
321 | pub trait ToSchemaId {
322 |     fn to_id(&self, schemas: &Arc<SchemaContainer>) -> u32;
323 | }
324 | 
325 | impl ToSchemaId for MorpheusSchema {
326 |     fn to_id(&self, _: &Arc<SchemaContainer>) -> u32 {
327 |         self.id
328 |     }
329 | }
330 | 
331 | impl ToSchemaId for u32 {
332 |     fn to_id(&self, _: &Arc<SchemaContainer>) -> u32 {
333 |         *self
334 |     }
335 | }
336 | 
337 | impl ToSchemaId for Schema {
338 |     fn to_id(&self, _: &Arc<SchemaContainer>) -> u32 {
339 |         self.id
340 |     }
341 | }
342 | 
343 | impl ToSchemaId for Arc<Schema> {
344 |     fn to_id(&self, _: &Arc<SchemaContainer>) -> u32 {
345 |         self.id
346 |     }
347 | }
348 | 
349 | impl ToSchemaId for Arc<MorpheusSchema> {
350 |     fn to_id(&self, _: &Arc<SchemaContainer>) -> u32 {
351 |         self.id
352 |     }
353 | }
354 | 
355 | impl<'a> ToSchemaId for &'a MorpheusSchema {
356 |     fn to_id(&self, _: &Arc<SchemaContainer>) -> u32 {
357 |         self.id
358 |     }
359 | }
360 | 
361 | impl<'a> ToSchemaId for &'a Schema {
362 |     fn to_id(&self, _: &Arc<SchemaContainer>) -> u32 {
363 |         self.id
364 |     }
365 | }
366 | 
367 | impl<'a> ToSchemaId for &'a str {
368 |     fn to_id(&self, schemas: &Arc<SchemaContainer>) -> u32 {
369 |         schemas.id_from_name(self).unwrap_or(0)
370 |     }
371 | }
372 | 


--------------------------------------------------------------------------------
/src/server/schema/sm.rs:
--------------------------------------------------------------------------------
  1 | use bifrost::raft::state_machine::callback::server::{NotifyError, SMCallback};
  2 | use bifrost::raft::state_machine::StateMachineCtl;
  3 | use bifrost::raft::RaftService;
  4 | use bifrost::*;
  5 | use bifrost_hasher::hash_str;
  6 | use std::collections::HashMap;
  7 | use std::sync::Arc;
  8 | 
  9 | use super::GraphSchema;
 10 | 
 11 | pub static DEFAULT_RAFT_PREFIX: &'static str = "MORPHEUS_SCHEMA_RAFT_SM";
 12 | 
 13 | pub struct GraphSchemasSM {
 14 |     map: HashMap<u32, GraphSchema>,
 15 |     callback: SMCallback,
 16 |     sm_id: u64,
 17 | }
 18 | 
 19 | raft_state_machine! {
 20 |     def qry get_all() -> Vec<(u32, GraphSchema)>;
 21 |     def qry get(id: u32) -> Option<GraphSchema>;
 22 |     def cmd new_schema(id: u32, schema: GraphSchema) -> Result<(), NotifyError>;
 23 |     def cmd del_schema(id: u32) -> Result<(), NotifyError>;
 24 |     def sub on_schema_added() -> (u32, GraphSchema);
 25 |     def sub on_schema_deleted() -> u32;
 26 | }
 27 | 
 28 | impl StateMachineCmds for GraphSchemasSM {
 29 |     fn get_all<'a>(&'a self) -> BoxFuture<Vec<(u32, GraphSchema)>> {
 30 |         future::ready(self.get_all_local()).boxed()
 31 |     }
 32 | 
 33 |     fn get<'a>(&'a self, id: u32) -> BoxFuture<Option<GraphSchema>> {
 34 |         future::ready(self.get_local(id)).boxed()
 35 |     }
 36 | 
 37 |     fn new_schema<'a>(
 38 |         &'a mut self,
 39 |         id: u32,
 40 |         schema: GraphSchema,
 41 |     ) -> BoxFuture<Result<(), NotifyError>> {
 42 |         self.map.insert(id, schema);
 43 |         async move {
 44 |             self.callback
 45 |                 .notify(commands::on_schema_added::new(), (id, schema))
 46 |                 .await?;
 47 |             Ok(())
 48 |         }
 49 |         .boxed()
 50 |     }
 51 | 
 52 |     fn del_schema<'a>(&'a mut self, id: u32) -> BoxFuture<Result<(), NotifyError>> {
 53 |         self.map.remove(&id).unwrap();
 54 |         async move {
 55 |             self.callback
 56 |                 .notify(commands::on_schema_deleted::new(), id)
 57 |                 .await?;
 58 |             Ok(())
 59 |         }
 60 |         .boxed()
 61 |     }
 62 | }
 63 | 
 64 | impl StateMachineCtl for GraphSchemasSM {
 65 |     raft_sm_complete!();
 66 |     fn id(&self) -> u64 {
 67 |         self.sm_id
 68 |     }
 69 |     fn snapshot(&self) -> Option<Vec<u8>> {
 70 |         Some(utils::serde::serialize(
 71 |             &self.map.iter().collect::<Vec<_>>(),
 72 |         ))
 73 |     }
 74 |     fn recover(&mut self, data: Vec<u8>) -> BoxFuture<()> {
 75 |         let schemas: Vec<(u32, GraphSchema)> = utils::serde::deserialize(&data).unwrap();
 76 |         for (k, v) in schemas {
 77 |             self.map.insert(k, v);
 78 |         }
 79 |         future::ready(()).boxed()
 80 |     }
 81 | }
 82 | 
 83 | impl GraphSchemasSM {
 84 |     pub async fn new<'a>(sm_id: u64, raft_service: &Arc<RaftService>) -> Self {
 85 |         Self {
 86 |             callback: SMCallback::new(sm_id, raft_service.clone()).await,
 87 |             map: HashMap::with_capacity(64),
 88 |             sm_id,
 89 |         }
 90 |     }
 91 |     fn get_all_local(&self) -> Vec<(u32, GraphSchema)> {
 92 |         self.map
 93 |             .iter()
 94 |             .map(|(k, v)| (*k, v.clone()))
 95 |             .collect::<Vec<_>>()
 96 |     }
 97 | 
 98 |     fn get_local(&self, id: u32) -> Option<GraphSchema> {
 99 |         self.map.get(&id).map(|s| s.clone())
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/src/server/traversal.rs:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/tests/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::config;
 2 | use crate::server::{MorpheusServer, MorpheusServerError};
 3 | use futures::Future;
 4 | use std::sync::Arc;
 5 | 
 6 | mod graph;
 7 | 
 8 | pub fn start_server<'a>(
 9 |     port: u32,
10 |     group: &'a str,
11 | ) -> impl Future<Output = Result<Arc<MorpheusServer>, MorpheusServerError>> {
12 |     let replacement_address: String = format!("127.0.0.1:{}", port);
13 |     let mut config = config::options_from_file("config/test_server.yaml");
14 |     config.meta_members = vec![replacement_address.clone()];
15 |     config.server_addr = replacement_address.clone();
16 |     config.group_name = format!("{}", group);
17 |     MorpheusServer::new(config)
18 | }
19 | 
20 | #[tokio::test]
21 | pub async fn server_startup() {
22 |     let _ = env_logger::try_init();
23 |     start_server(4000, "bootstrap").await.unwrap();
24 | }
25 | 


--------------------------------------------------------------------------------
/src/traversal/bfs/coordinator.rs:
--------------------------------------------------------------------------------
 1 | use super::*;
 2 | 
 3 | pub struct BFSCoordinator {
 4 |     pub server_id: u64,
 5 |     pub task_id_counter: AtomicU64,
 6 | }
 7 | 
 8 | impl BFSCoordinator {
 9 |     pub fn new(server_id: u64) -> Self {
10 |         Self {
11 |             server_id,
12 |             task_id_counter: AtomicU64::new(0),
13 |         }
14 |     }
15 | 
16 |     pub fn next_task_id(&self) -> TaskId {
17 |         let task_id = self.task_id_counter.fetch_add(1, Ordering::SeqCst);
18 |         TaskId {
19 |             coordinator_server_id: self.server_id,
20 |             coordinator_task_id: task_id,
21 |         }
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/traversal/bfs/engine.rs:
--------------------------------------------------------------------------------
  1 | use super::*;
  2 | 
  3 | #[derive(Debug, Clone, Hash, Eq, PartialEq)]
  4 | pub struct TaskId {
  5 |     pub coordinator_server_id: u64,
  6 |     pub coordinator_task_id: u64,
  7 | }
  8 | 
  9 | pub struct SharedEngine {
 10 |     pub graph: Arc<GraphEngine>,
 11 |     pub runtime: Runtime,
 12 | }
 13 | 
 14 | pub struct BFSEngine {
 15 |     pub server_id: u64,
 16 |     pub coordinator: BFSCoordinator,
 17 |     pub node_tasks: PtrHashMap<TaskId, Arc<BFSNodeTask>>,
 18 |     pub shared_engine: Arc<SharedEngine>,
 19 | }
 20 | 
 21 | impl BFSEngine {
 22 |     pub fn new(server_id: u64, graph: &Arc<GraphEngine>) -> Self {
 23 |         let thread_counter: AtomicUsize = AtomicUsize::new(0);
 24 |         Self {
 25 |             server_id,
 26 |             coordinator: BFSCoordinator {
 27 |                 server_id,
 28 |                 task_id_counter: AtomicU64::new(0),
 29 |             },
 30 |             node_tasks: PtrHashMap::with_capacity(DS_CAPACITY),
 31 |             shared_engine: Arc::new(SharedEngine {
 32 |                 graph: graph.clone(),
 33 |                 runtime: runtime::Builder::new_multi_thread()
 34 |                     .enable_all()
 35 |                     .thread_name_fn(move || {
 36 |                         let counter = thread_counter.fetch_add(1, Ordering::SeqCst);
 37 |                         format!("traversal-worker-{}", counter)
 38 |                     })
 39 |                     .worker_threads(num_cpus::get())
 40 |                     .build()
 41 |                     .unwrap(),
 42 |             }),
 43 |         }
 44 |     }
 45 | 
 46 |     pub fn create_task<T: BFSTask>(
 47 |         &self,
 48 |         task: &'static T,
 49 |         params_ptr: usize,
 50 |         frontiers: Vec<Id>,
 51 |     ) -> TaskId {
 52 |         let task_id = self.coordinator.next_task_id();
 53 |         let node = Arc::new(BFSNodeTask::new(
 54 |             task_id.coordinator_task_id,
 55 |             task,
 56 |             params_ptr,
 57 |             &self.shared_engine,
 58 |         ));
 59 |         for fid in frontiers {
 60 |             node.frontier.push_back(fid);
 61 |         }
 62 |         self.node_tasks.insert(task_id.clone(), node);
 63 |         task_id
 64 |     }
 65 | 
 66 |     pub fn get_graph_engine(&self) -> &Arc<GraphEngine> {
 67 |         &self.shared_engine.graph
 68 |     }
 69 | 
 70 |     pub async fn step_node_task(&self, node: &Arc<BFSNodeTask>) {
 71 |         let mut procs = vec![];
 72 |         while let Some(id) = node.frontier.pop_front() {
 73 |             let vertex = self.shared_engine.graph.vertex_by(id).await;
 74 |             let (ctx, crx) = oneshot::channel();
 75 |             let task = node.task;
 76 |             let node = node.clone();
 77 |             self.shared_engine.runtime.spawn(async move {
 78 |                 match vertex {
 79 |                     Ok(Some(vertex)) => {
 80 |                         let vertex_edges = task.process_vertex(&vertex, &*node).await;
 81 |                         // Now the step edges should be updated
 82 |                         for (_opposite_vertex_id, edge) in vertex_edges {
 83 |                             task.process_edge(&vertex, &edge, &*node).await;
 84 |                         }
 85 |                     }
 86 |                     _ => {}
 87 |                 }
 88 |                 let _ = ctx.send(());
 89 |             });
 90 |             procs.push(crx);
 91 |         }
 92 |         // BSP, wait for all the vertices and their edges to be processed
 93 |         for proc in procs {
 94 |             let _ = proc.await;
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | impl BFSNodeTask {
100 |     fn new(
101 |         task_id: u64,
102 |         task: &'static dyn BFSTask,
103 |         params_ptr: usize,
104 |         shared_engine: &Arc<SharedEngine>,
105 |     ) -> Self {
106 |         Self {
107 |             task_id,
108 |             visited: PtrHashMap::with_capacity(DS_CAPACITY),
109 |             cells_cache: PtrHashMap::with_capacity(DS_CAPACITY),
110 |             cells_metas: PtrHashMap::with_capacity(DS_CAPACITY),
111 |             next_hops: PtrHashMap::with_capacity(DS_CAPACITY),
112 |             task,
113 |             params_ptr,
114 |             frontier: LinkedRingBufferList::new(),
115 |             shared_engine: shared_engine.clone(),
116 |         }
117 |     }
118 | 
119 |     pub fn mark_visited(&mut self, id: Id) {
120 |         self.visited.insert(id, ());
121 |     }
122 | 
123 |     pub fn is_visited(&self, id: &Id) -> bool {
124 |         self.visited.contains_key(id)
125 |     }
126 | 
127 |     pub fn cache_cell(&mut self, cell: OwnedCell) {
128 |         self.cells_cache.insert(cell.id(), cell);
129 |     }
130 | 
131 |     pub fn get_cached_cell(&self, id: &Id) -> Option<PtrRef<'_, OwnedCell>> {
132 |         self.cells_cache.get_ref(id)
133 |     }
134 |     pub fn graph(&self) -> &Arc<GraphEngine> {
135 |         &self.shared_engine.graph
136 |     }
137 | }
138 | 
139 | impl Drop for BFSNodeTask {
140 |     fn drop(&mut self) {
141 |         self.task.dispose(self);
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/src/traversal/bfs/mod.rs:
--------------------------------------------------------------------------------
 1 | // Still working in progress
 2 | 
 3 | use std::{
 4 |     future::Future,
 5 |     sync::atomic::{AtomicU64, AtomicUsize, Ordering},
 6 | };
 7 | 
 8 | use dovahkiin::types::{Id, OwnedValue};
 9 | use futures::future::BoxFuture;
10 | use lightning::{
11 |     aarc::Arc,
12 |     list::LinkedRingBufferList,
13 |     map::{Map, PtrHashMap, PtrRef},
14 | };
15 | use neb::ram::cell::{Cell, OwnedCell};
16 | use tokio::{
17 |     runtime::{self, Runtime},
18 |     sync::oneshot,
19 | };
20 | 
21 | use crate::graph::{edge::Edge, vertex::Vertex, GraphEngine};
22 | use rayon::*;
23 | 
24 | mod coordinator;
25 | use coordinator::*;
26 | 
27 | mod engine;
28 | use engine::*;
29 | 
30 | mod task;
31 | use task::*;
32 | 
33 | unsafe impl Send for BFSNodeTask {}
34 | unsafe impl Sync for BFSNodeTask {}
35 | 
36 | const DS_CAPACITY: usize = 1024;
37 | 


--------------------------------------------------------------------------------
/src/traversal/bfs/task.rs:
--------------------------------------------------------------------------------
 1 | use super::*;
 2 | 
 3 | pub struct BFSNodeTask {
 4 |     pub task_id: u64,
 5 |     pub visited: PtrHashMap<Id, ()>,
 6 |     pub cells_cache: PtrHashMap<Id, OwnedCell>,
 7 |     pub cells_metas: PtrHashMap<Id, OwnedValue>,
 8 |     pub next_hops: PtrHashMap<Id, OwnedValue>,
 9 |     pub task: &'static dyn BFSTask,
10 |     pub params_ptr: usize,
11 | 
12 |     pub frontier: LinkedRingBufferList<Id, DS_CAPACITY>,
13 | 
14 |     pub shared_engine: Arc<SharedEngine>,
15 | }
16 | 
17 | pub trait BFSTask: Send + Sync {
18 |     fn process_vertex(&self, vertex: &Vertex, node: &BFSNodeTask)
19 |         -> BoxFuture<'_, Vec<(Id, Edge)>>; // Edge, opposite vertex id
20 |     fn process_edge(&self, vertex: &Vertex, edge: &Edge, node: &BFSNodeTask) -> BoxFuture<'_, ()>;
21 |     fn dispose(&self, node: &BFSNodeTask);
22 | }
23 | 


--------------------------------------------------------------------------------
/src/traversal/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod bfs;
2 | pub mod navigation;
3 | 


--------------------------------------------------------------------------------
/src/traversal/navigation/apps.rs:
--------------------------------------------------------------------------------
 1 | use super::*;
 2 | 
 3 | #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Serialize, Deserialize)]
 4 | pub enum App {
 5 |     Hnsw,
 6 | }
 7 | 
 8 | impl App {
 9 |     pub fn to_task(&self) -> &'static dyn NavigationTask {
10 |         unimplemented!()
11 |         // match self {
12 |         //     App::Hnsw => &HnswTraversalTask,
13 |         // }
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/src/traversal/navigation/engine.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     collections::{HashMap, HashSet},
  3 |     mem,
  4 | };
  5 | 
  6 | use async_std::sync::Mutex;
  7 | use bifrost::conshash::ConsistentHashing;
  8 | 
  9 | use crate::job::*;
 10 | 
 11 | use super::{
 12 |     server::{NodeFrontier, NodeStep},
 13 |     *,
 14 | };
 15 | 
 16 | pub struct NavigationEngine {
 17 |     pub server_id: u64,
 18 |     pub coordinator: NavigationCoordinator,
 19 |     pub active_tasks: PtrHashMap<JobId, Arc<Mutex<NavigationWorker>>>,
 20 |     pub shared_engine: Arc<SharedEngine>,
 21 | }
 22 | 
 23 | unsafe impl Send for NavigationEngine {}
 24 | unsafe impl Sync for NavigationEngine {}
 25 | 
 26 | pub struct SharedEngine {
 27 |     pub graph: Arc<GraphEngine>,
 28 |     pub runtime: Runtime,
 29 | }
 30 | 
 31 | unsafe impl Send for SharedEngine {}
 32 | unsafe impl Sync for SharedEngine {}
 33 | 
 34 | impl NavigationEngine {
 35 |     pub fn new(
 36 |         server_id: u64,
 37 |         graph: &Arc<GraphEngine>,
 38 |         conshash: &Arc<ConsistentHashing>,
 39 |     ) -> Self {
 40 |         let thread_counter: AtomicUsize = AtomicUsize::new(0);
 41 |         let runtime = runtime::Builder::new_multi_thread()
 42 |             .enable_all()
 43 |             .thread_name_fn(move || {
 44 |                 let counter = thread_counter.fetch_add(1, Ordering::SeqCst);
 45 |                 format!("navigation-worker-{}", counter)
 46 |             })
 47 |             .worker_threads(num_cpus::get())
 48 |             .build()
 49 |             .unwrap();
 50 |         let shared_engine = Arc::new(SharedEngine {
 51 |             graph: graph.clone(),
 52 |             runtime,
 53 |         });
 54 |         Self {
 55 |             server_id,
 56 |             coordinator: NavigationCoordinator::new(server_id, conshash, &shared_engine),
 57 |             active_tasks: PtrHashMap::with_capacity(DS_CAPACITY),
 58 |             shared_engine: shared_engine.clone(),
 59 |         }
 60 |     }
 61 | 
 62 |     // pub fn create_navigation(
 63 |     //     &self,
 64 |     //     task: &'static dyn NavigationTask,
 65 |     //     params_ptr: usize,
 66 |     // ) -> JobId {
 67 |     //     let job_id = self.coordinator.next_job_id();
 68 |     //     let initial_frontier = task.initial_frontier(params_ptr, &self.shared_engine.graph);
 69 |     //     let node = self.new_node(job_id, task, params_ptr, initial_frontier);
 70 |     //     job_id
 71 |     // }
 72 | 
 73 |     pub fn new_worker(
 74 |         &self,
 75 |         job_id: JobId,
 76 |         task: &'static dyn NavigationTask,
 77 |         params_ptr: usize,
 78 |         initial_frontier: Vec<NodeFrontierItem>,
 79 |     ) {
 80 |         let worker = Arc::new(Mutex::new(NavigationWorker::new(
 81 |             job_id,
 82 |             task,
 83 |             params_ptr,
 84 |             initial_frontier,
 85 |             &self.shared_engine,
 86 |         )));
 87 |         self.active_tasks.insert(job_id, worker.clone());
 88 |     }
 89 | 
 90 |     pub async fn worker_navigate_step(
 91 |         &self,
 92 |         node: &mut NavigationWorker,
 93 |     ) -> HashMap<Id, NavigationResult> {
 94 |         let batch_size = node.task.batch_size(node);
 95 |         let mut secondary_batch = Vec::with_capacity(batch_size);
 96 |         let mut batch = Vec::with_capacity(batch_size);
 97 |         let engine = &self.shared_engine.graph;
 98 |         while let Some(mut frontier_item) = node.current_frontier.pop() {
 99 |             while let Some(opposite_id) = frontier_item.opposite_ids.pop() {
100 |                 let vertex_id = opposite_id;
101 |                 let vertex = self.shared_engine.graph.vertex_by(vertex_id).await;
102 |                 if let Ok(Some(vertex)) = vertex {
103 |                     batch.push(vertex);
104 |                 }
105 |                 if batch.len() == batch_size {
106 |                     let measurements = match node.task.measure_vertices(engine, batch, node) {
107 |                         Ok(measurements) => measurements,
108 |                         Err(e) => {
109 |                             return HashMap::from([(Id::unit_id(), NavigationResult::Error(e))])
110 |                         }
111 |                     };
112 |                     let candidate = select_measured_vertex(measurements, node);
113 |                     let vertices = candidate
114 |                         .into_iter()
115 |                         .filter(|(v, _)| !v.cell.id().is_unit_id());
116 |                     secondary_batch.extend(vertices);
117 |                     batch = Vec::with_capacity(batch_size);
118 |                 }
119 |             }
120 |         }
121 |         if batch.len() > 0 {
122 |             let measurements = match node.task.measure_vertices(engine, batch, node) {
123 |                 Ok(measurements) => measurements,
124 |                 Err(e) => return HashMap::from([(Id::unit_id(), NavigationResult::Error(e))]),
125 |             };
126 |             let candidate = select_measured_vertex(measurements, node);
127 |             let vertices = candidate
128 |                 .into_iter()
129 |                 .filter(|(v, _)| !v.cell.id().is_unit_id());
130 |             secondary_batch.extend(vertices);
131 |         }
132 |         let selected = if secondary_batch.len() > 1 {
133 |             select_measured_vertex(secondary_batch, node)
134 |         } else if secondary_batch.len() == 1 {
135 |             secondary_batch
136 |         } else {
137 |             node.frontier_candidates.clear();
138 |             return HashMap::new();
139 |         };
140 |         let mut frontier_candidates = Vec::new();
141 |         let mut all_results = HashMap::new();
142 |         for (vertex, distance) in selected {
143 |             let vertex_id = vertex.cell.id();
144 |             let (result, opposite_ids) = node.task.navigate_vertex(vertex, distance, node).await;
145 |             debug_assert!(distance >= 0.0); // Should not have negative distance
146 |             let next_frontier_item = NodeFrontierItem {
147 |                 vertex_id,
148 |                 distance,
149 |                 opposite_ids,
150 |             };
151 |             frontier_candidates.push(next_frontier_item);
152 |             all_results.insert(vertex_id, result);
153 |         }
154 |         // Save the opposite ids for the coordinator to fetch and distribute
155 |         node.frontier_candidates = frontier_candidates;
156 |         return all_results;
157 |     }
158 | }
159 | 
160 | fn select_measured_vertex<'a>(
161 |     measurements: Vec<(Vertex, Distance)>,
162 |     node: &'a NavigationWorker,
163 | ) -> Vec<(Vertex, Distance)> {
164 |     // Avoid creating a HashMap if we have 0 or 1 measurements
165 |     if measurements.len() <= 1 {
166 |         return measurements;
167 |     }
168 | 
169 |     // Pre-allocate with capacity to avoid resizing
170 |     let mut indexed_measurements = Vec::with_capacity(measurements.len());
171 |     let mut vertex_map = Vec::with_capacity(measurements.len());
172 | 
173 |     // Build the indexed measurements and vertex map in a single pass
174 |     for (vertex, distance) in measurements.into_iter() {
175 |         let vid = vertex.cell.id();
176 |         indexed_measurements.push((vid, distance));
177 |         vertex_map.push((vid, (vertex, distance)));
178 |     }
179 | 
180 |     // Get selected indices from the task
181 |     let selected_ids = node
182 |         .task
183 |         .select_measured(indexed_measurements, node.params_ptr);
184 |     // Convert selected_ids to a HashSet for O(1) lookups
185 |     let selected_set: std::collections::HashSet<_> =
186 |         selected_ids.into_iter().map(|(vid, _)| vid).collect();
187 | 
188 |     // Filter the vertex map
189 |     vertex_map
190 |         .into_iter()
191 |         .filter(|(vid, _)| selected_set.contains(vid))
192 |         .map(|(_, vertex_data)| vertex_data)
193 |         .collect()
194 | }
195 | 


--------------------------------------------------------------------------------
/src/traversal/navigation/job.rs:
--------------------------------------------------------------------------------
 1 | use super::*;
 2 | 
 3 | #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq, Serialize, Deserialize)]
 4 | pub struct JobId {
 5 |     pub coordinator_server_id: u64,
 6 |     pub coordinator_job_id: u64,
 7 | }
 8 | 
 9 | pub struct NavigationJob {
10 |     pub job_id: JobId,
11 |     pub frontier_rx: Receiver<Vec<Id>>,
12 |     pub step_tx: Sender<NavigationStep>,
13 | }
14 | 
15 | pub struct NextStep {
16 |     pub vertex_id: Id,
17 |     pub distance: f64,
18 |     pub opposite_ids: Vec<Id>,
19 |     pub metadata: Vec<u8>,
20 | }
21 | 
22 | pub enum NavigationStep {
23 |     NextStep(NextStep),
24 |     Terminate,
25 |     Finished,
26 | }
27 | 


--------------------------------------------------------------------------------
/src/traversal/navigation/mod.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     future::Future,
 3 |     sync::atomic::{AtomicU64, AtomicUsize, Ordering},
 4 | };
 5 | 
 6 | use dovahkiin::types::Id;
 7 | use futures::future::BoxFuture;
 8 | use lightning::map::{Map, PtrHashMap};
 9 | use serde::{Deserialize, Serialize};
10 | use std::sync::Arc;
11 | use tokio::runtime::{self, Runtime};
12 | 
13 | use crate::{
14 |     // apps::hnsw::navigate_task::HnswTraversalTask,
15 |     graph::{vertex::Vertex, GraphEngine},
16 | };
17 | use tokio::sync::mpsc::*;
18 | 
19 | pub type Distance = f32;
20 | 
21 | pub mod apps;
22 | pub mod coordinator;
23 | pub mod engine;
24 | pub mod server;
25 | pub mod task;
26 | pub mod worker;
27 | 
28 | pub use apps::*;
29 | pub use coordinator::*;
30 | pub use engine::*;
31 | pub use server::*;
32 | pub use task::*;
33 | pub use worker::*;
34 | 
35 | const DS_CAPACITY: usize = 1024;
36 | 
37 | #[derive(Debug, Clone, Serialize, Deserialize)]
38 | pub struct NodeFrontierItem {
39 |     pub vertex_id: Id,
40 |     pub distance: Distance,
41 |     pub opposite_ids: Vec<Id>,
42 | }
43 | 


--------------------------------------------------------------------------------
/src/traversal/navigation/server.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashMap;
  2 | 
  3 | use crate::{
  4 |     apps::hnsw,
  5 |     job::{JobId, JobReport},
  6 | };
  7 | 
  8 | use super::*;
  9 | use bifrost::{
 10 |     conshash::ConsistentHashing, dispatch_rpc_service_functions, rpc::*, service, service_with_id,
 11 | };
 12 | 
 13 | #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 14 | pub struct NodeFrontier {
 15 |     pub items: Vec<NodeFrontierItem>,
 16 |     pub metadata: Vec<u8>,
 17 | }
 18 | 
 19 | #[derive(Debug, Clone, Serialize, Deserialize)]
 20 | pub enum NodeStep {
 21 |     Step(Distance),
 22 |     Terminate,
 23 |     Finished,
 24 | }
 25 | 
 26 | #[derive(Debug, Clone, Serialize, Deserialize)]
 27 | pub enum NavigationResult {
 28 |     Continue(Distance),
 29 |     Found,
 30 |     NotFound,
 31 |     Error(String),
 32 |     NA,
 33 | }
 34 | 
 35 | pub static DEFAULT_SERVICE_ID: u64 = hash_ident!(MORPHEUS_NAVIGATION_RPC_SERVICE) as u64;
 36 | 
 37 | service! {
 38 |     // For coordinator
 39 |     rpc new_job(app: App, config: Vec<u8>) -> Result<JobId, String>;
 40 |     rpc start_job(job_id: JobId) -> Result<(), String>;
 41 |     rpc job_report(job_id: JobId) -> Result<JobReport, String>;
 42 |     rpc stop_job(job_id: JobId) -> Result<(), String>;
 43 |     // For nodes
 44 |     rpc worker_new_task(job_id: JobId, app: App, initial_frontier: Vec<Id>, config: Vec<u8>);
 45 |     rpc worker_step(job_id: JobId) -> HashMap<Id, NavigationResult>;
 46 |     rpc worker_frontier(job_id: JobId) -> NodeFrontier;
 47 |     rpc worker_new_frontier(job_id: JobId, frontier: NodeFrontier);
 48 |     rpc worker_terminate(job_id: JobId);
 49 |     rpc worker_findings(job_id: JobId) -> Result<Vec<u8>, String>;
 50 | }
 51 | 
 52 | pub struct NavigationService {
 53 |     engine: Arc<NavigationEngine>,
 54 | }
 55 | 
 56 | impl Service for NavigationService {
 57 |     fn new_job<'a>(
 58 |         &'a self,
 59 |         app: App,
 60 |         config: Vec<u8>,
 61 |     ) -> futures::future::BoxFuture<'a, Result<JobId, String>> {
 62 |         async move {
 63 |             let job_id = self.engine.coordinator.create_job(app, config);
 64 |             self.engine.coordinator.initialize_job(job_id).await?;
 65 |             return Ok(job_id);
 66 |         }
 67 |         .boxed()
 68 |     }
 69 | 
 70 |     fn start_job<'a>(
 71 |         &'a self,
 72 |         job_id: JobId,
 73 |     ) -> ::futures::future::BoxFuture<'a, Result<(), String>> {
 74 |         async move {
 75 |             // Get the params pointer from the active task, cannot get it from the coordinator
 76 |             let params_ptr = self
 77 |                 .engine
 78 |                 .active_tasks
 79 |                 .get(&job_id)
 80 |                 .unwrap()
 81 |                 .lock()
 82 |                 .await
 83 |                 .params_ptr;
 84 |             self.engine.coordinator.start_job(job_id, params_ptr).await
 85 |         }
 86 |         .boxed()
 87 |     }
 88 | 
 89 |     fn job_report<'a>(
 90 |         &'a self,
 91 |         job_id: JobId,
 92 |     ) -> ::futures::future::BoxFuture<'a, Result<JobReport, String>> {
 93 |         self.engine.coordinator.job_report(job_id).boxed()
 94 |     }
 95 | 
 96 |     fn stop_job<'a>(
 97 |         &'a self,
 98 |         job_id: JobId,
 99 |     ) -> ::futures::future::BoxFuture<'a, Result<(), String>> {
100 |         self.engine.coordinator.stop_job(job_id).boxed()
101 |     }
102 |     ////////////////////////////////////////////////////////////////
103 | 
104 |     fn worker_new_task<'a>(
105 |         &'a self,
106 |         job_id: JobId,
107 |         app: App,
108 |         initial_frontier: Vec<Id>,
109 |         config: Vec<u8>,
110 |     ) -> futures::future::BoxFuture<'a, ()> {
111 |         let task = app.to_task();
112 |         let params_ptr = task.params_from_config(&config);
113 |         let initial_frontier = vec![NodeFrontierItem {
114 |             vertex_id: Id::unit_id(),
115 |             distance: 0.0,
116 |             opposite_ids: initial_frontier,
117 |         }];
118 |         self.engine
119 |             .new_worker(job_id, task, params_ptr, initial_frontier);
120 |         Box::pin(future::ready(()))
121 |     }
122 | 
123 |     fn worker_step<'a>(&'a self, job_id: JobId) -> BoxFuture<'a, HashMap<Id, NavigationResult>> {
124 |         async move {
125 |             let worker = self.engine.active_tasks.get(&job_id).unwrap();
126 |             let mut workder_guard = worker.lock().await;
127 |             let result = self.engine.worker_navigate_step(&mut workder_guard).await;
128 |             return result;
129 |         }
130 |         .boxed()
131 |     }
132 | 
133 |     fn worker_frontier<'a>(&'a self, job_id: JobId) -> BoxFuture<'a, NodeFrontier> {
134 |         async move {
135 |             let worker = self.engine.active_tasks.get(&job_id).unwrap();
136 |             let worker_guard = worker.lock().await;
137 |             let frontier_items = worker_guard.frontier_candidates.clone();
138 |             let updates = worker_guard.task.encode_states(&*worker_guard);
139 |             return NodeFrontier {
140 |                 items: frontier_items,
141 |                 metadata: updates,
142 |             };
143 |         }
144 |         .boxed()
145 |     }
146 | 
147 |     fn worker_new_frontier<'a>(
148 |         &'a self,
149 |         job_id: JobId,
150 |         frontier: NodeFrontier,
151 |     ) -> BoxFuture<'a, ()> {
152 |         async move {
153 |             let worker = self.engine.active_tasks.get(&job_id).unwrap();
154 |             let mut worker_guard = worker.lock().await;
155 |             worker_guard.current_frontier = frontier.items;
156 |             worker_guard
157 |                 .task
158 |                 .update_states(&*worker_guard, &frontier.metadata);
159 |         }
160 |         .boxed()
161 |     }
162 | 
163 |     fn worker_terminate<'a>(&'a self, job_id: JobId) -> ::futures::future::BoxFuture<'a, ()> {
164 |         async move {
165 |             let worker = self.engine.active_tasks.get(&job_id).unwrap();
166 |             let worker_guard = worker.lock().await;
167 |             worker_guard.task.cleanup(&*worker_guard);
168 |             self.engine.active_tasks.remove(&job_id);
169 |         }
170 |         .boxed()
171 |     }
172 | 
173 |     fn worker_findings<'a>(
174 |         &'a self,
175 |         job_id: JobId,
176 |     ) -> ::futures::future::BoxFuture<'a, Result<Vec<u8>, String>> {
177 |         async move {
178 |             let worker = self.engine.active_tasks.get(&job_id).unwrap();
179 |             let worker_guard = worker.lock().await;
180 |             return worker_guard.task.findings(&*worker_guard).await;
181 |         }
182 |         .boxed()
183 |     }
184 | }
185 | 
186 | impl NavigationService {
187 |     pub fn new(
188 |         server_id: u64,
189 |         graph: &Arc<GraphEngine>,
190 |         conshash: &Arc<ConsistentHashing>,
191 |     ) -> Arc<Self> {
192 |         let engine = Arc::new(NavigationEngine::new(server_id, graph, conshash));
193 |         Arc::new(Self { engine })
194 |     }
195 | }
196 | 
197 | dispatch_rpc_service_functions!(NavigationService);
198 | service_with_id!(NavigationService, DEFAULT_SERVICE_ID);
199 | 


--------------------------------------------------------------------------------
/src/traversal/navigation/task.rs:
--------------------------------------------------------------------------------
 1 | use super::*;
 2 | 
 3 | pub trait NavigationTask: Send + Sync {
 4 |     fn measure_vertices<'a>(
 5 |         &'a self,
 6 |         engine: &Arc<GraphEngine>,
 7 |         vertices: Vec<Vertex>,
 8 |         node: &'a NavigationWorker,
 9 |     ) -> Result<Vec<(Vertex, Distance)>, String>;
10 |     fn select_measured<'a>(
11 |         &'a self,
12 |         vertices: Vec<(Id, Distance)>,
13 |         params_ptr: usize,
14 |     ) -> Vec<(Id, Distance)>;
15 |     fn navigate_vertex<'a>(
16 |         &'a self,
17 |         vertex: Vertex,
18 |         distance: Distance,
19 |         node: &'a NavigationWorker,
20 |     ) -> BoxFuture<'a, (NavigationResult, Vec<Id>)>;
21 |     fn cleanup(&self, node: &NavigationWorker);
22 |     fn batch_size(&self, node: &NavigationWorker) -> usize;
23 |     fn encode_states(&self, node: &NavigationWorker) -> Vec<u8>;
24 |     fn update_states(&self, node: &NavigationWorker, states: &[u8]);
25 |     fn params_from_config(&self, config: &Vec<u8>) -> usize;
26 |     fn initial_frontier<'a>(
27 |         &'a self,
28 |         params_ptr: usize,
29 |         graph: &'a Arc<GraphEngine>,
30 |     ) -> BoxFuture<'a, Result<Vec<Id>, String>>;
31 |     fn dispose(&self, node: &NavigationWorker);
32 |     fn findings<'a>(&'a self, node: &'a NavigationWorker)
33 |         -> BoxFuture<'a, Result<Vec<u8>, String>>;
34 |     fn finallalize_findings(
35 |         &self,
36 |         params_ptr: usize,
37 |         findings: &Vec<&Vec<u8>>,
38 |     ) -> BoxFuture<Vec<u8>>; // on the coordinator
39 |     fn aggregate_metadata(&self, params_ptr: usize, metadata: &Vec<&Vec<u8>>)
40 |         -> BoxFuture<Vec<u8>>; // on the coordinator
41 | }
42 | 


--------------------------------------------------------------------------------
/src/traversal/navigation/worker.rs:
--------------------------------------------------------------------------------
 1 | use crate::job::JobId;
 2 | 
 3 | use super::*;
 4 | 
 5 | pub struct NavigationWorker {
 6 |     pub job_id: JobId,
 7 |     pub current_frontier: Vec<NodeFrontierItem>,
 8 |     pub frontier_candidates: Vec<NodeFrontierItem>,
 9 |     pub task: &'static dyn NavigationTask,
10 |     pub params_ptr: usize,
11 |     shared_engine: Arc<SharedEngine>,
12 | }
13 | 
14 | impl NavigationWorker {
15 |     pub fn new(
16 |         job_id: JobId,
17 |         task: &'static dyn NavigationTask,
18 |         params_ptr: usize,
19 |         initial_frontier: Vec<NodeFrontierItem>,
20 |         shared_engine: &Arc<SharedEngine>,
21 |     ) -> Self {
22 |         Self {
23 |             job_id,
24 |             task,
25 |             params_ptr,
26 |             current_frontier: initial_frontier,
27 |             frontier_candidates: vec![],
28 |             shared_engine: shared_engine.clone(),
29 |         }
30 |     }
31 | 
32 |     pub fn graph(&self) -> &Arc<GraphEngine> {
33 |         &self.shared_engine.graph
34 |     }
35 | }
36 | 
37 | impl Drop for NavigationWorker {
38 |     fn drop(&mut self) {
39 |         self.task.dispose(self);
40 |     }
41 | }
42 | 
43 | unsafe impl Send for NavigationWorker {}
44 | unsafe impl Sync for NavigationWorker {}
45 | 


--------------------------------------------------------------------------------
/src/utils/bloom_filter.rs:
--------------------------------------------------------------------------------
  1 | use ahash::RandomState;
  2 | use serde::{Deserialize, Serialize};
  3 | use std::hash::{BuildHasher, Hash, Hasher};
  4 | 
  5 | /// Seeds for the two hash functions used in the Bloom filter.
  6 | /// These are arbitrary but fixed values to ensure consistent hashing.
  7 | const HASH_SEED_1: usize = 0x1234_5678_9ABC_DEF0;
  8 | const HASH_SEED_2: usize = 0xFEDC_BA98_7654_3210;
  9 | 
 10 | /// A simple Bloom filter implementation.
 11 | ///
 12 | /// A Bloom filter is a space-efficient probabilistic data structure that is used to test
 13 | /// whether an element is a member of a set. False positives are possible, but false negatives are not.
 14 | ///
 15 | /// This implementation uses ahash for fast hashing and bitwise operations for efficient storage.
 16 | /// The size is always rounded up to the next power of 2 for efficient bit operations.
 17 | #[derive(Clone, Debug, Serialize, Deserialize)]
 18 | pub struct BloomFilter {
 19 |     bits: Vec<u8>,
 20 |     num_hashes: usize,
 21 |     size: usize,
 22 |     size_mask: usize, // Mask for bit operations instead of modulo
 23 | }
 24 | 
 25 | impl BloomFilter {
 26 |     /// Creates a new Bloom filter with the specified size (in bytes) and number of hash functions.
 27 |     /// Size will be rounded up to the next power of 2 for efficient operations.
 28 |     pub fn new(size_in_bytes: usize, num_hashes: usize) -> Self {
 29 |         // Round up to the next power of 2
 30 |         let size_in_bytes = Self::next_power_of_two(size_in_bytes);
 31 |         let size = size_in_bytes * 8; // Convert bytes to bits
 32 | 
 33 |         BloomFilter {
 34 |             bits: vec![0; size_in_bytes],
 35 |             num_hashes,
 36 |             size,
 37 |             size_mask: size - 1, // For efficient modulo with bitwise AND
 38 |         }
 39 |     }
 40 | 
 41 |     /// Creates a new Bloom filter with optimal size and hash count for the expected number of elements
 42 |     /// and desired false positive probability.
 43 |     pub fn with_rate(expected_elements: usize, false_positive_rate: f64) -> Self {
 44 |         // Calculate optimal size (in bits)
 45 |         let mut size = Self::optimal_size(expected_elements, false_positive_rate);
 46 |         // Round up to the next power of 2
 47 |         size = Self::next_power_of_two(size / 8) * 8;
 48 | 
 49 |         // Calculate optimal number of hash functions
 50 |         let num_hashes = Self::optimal_hashes(size, expected_elements);
 51 | 
 52 |         BloomFilter {
 53 |             bits: vec![0; size >> 3], // Convert bits to bytes with right shift
 54 |             num_hashes,
 55 |             size,
 56 |             size_mask: size - 1,
 57 |         }
 58 |     }
 59 | 
 60 |     /// Rounds up to the next power of 2
 61 |     fn next_power_of_two(n: usize) -> usize {
 62 |         n.next_power_of_two()
 63 |     }
 64 | 
 65 |     /// Calculates the optimal size in bits for the given parameters
 66 |     fn optimal_size(expected_elements: usize, false_positive_rate: f64) -> usize {
 67 |         let size =
 68 |             -((expected_elements as f64) * false_positive_rate.ln()) / (2.0_f64.ln().powi(2));
 69 |         size.ceil() as usize
 70 |     }
 71 | 
 72 |     /// Calculates the optimal number of hash functions for the given parameters
 73 |     fn optimal_hashes(size: usize, expected_elements: usize) -> usize {
 74 |         let hashes = (size as f64 / expected_elements as f64) * 2.0_f64.ln();
 75 |         hashes.ceil() as usize
 76 |     }
 77 | 
 78 |     /// Inserts an element into the Bloom filter.
 79 |     pub fn insert<T: Hash>(&mut self, item: &T) {
 80 |         // Use two different hash builders with different seeds
 81 |         let hash_builder1 = RandomState::with_seed(HASH_SEED_1);
 82 |         let hash_builder2 = RandomState::with_seed(HASH_SEED_2);
 83 | 
 84 |         let mut hasher1 = hash_builder1.build_hasher();
 85 |         let mut hasher2 = hash_builder2.build_hasher();
 86 | 
 87 |         item.hash(&mut hasher1);
 88 |         item.hash(&mut hasher2);
 89 | 
 90 |         let hash1 = hasher1.finish();
 91 |         let hash2 = hasher2.finish();
 92 | 
 93 |         for i in 0..self.num_hashes {
 94 |             // Use double hashing to generate multiple hash values
 95 |             let combined_hash = hash1.wrapping_add(i as u64).wrapping_mul(hash2);
 96 |             let index = (combined_hash as usize) & self.size_mask;
 97 |             let byte_index = index >> 3; // Equivalent to index / 8
 98 |             let bit_index = index & 0x7; // Equivalent to index % 8
 99 |             self.bits[byte_index] |= 1 << bit_index;
100 |         }
101 |     }
102 | 
103 |     /// Checks if an element might be in the Bloom filter.
104 |     /// Returns true if the element might be in the set, false if it definitely is not.
105 |     pub fn contains<T: Hash>(&self, item: &T) -> bool {
106 |         // Use the same hash builders as in insert
107 |         let hash_builder1 = RandomState::with_seed(HASH_SEED_1);
108 |         let hash_builder2 = RandomState::with_seed(HASH_SEED_2);
109 | 
110 |         let mut hasher1 = hash_builder1.build_hasher();
111 |         let mut hasher2 = hash_builder2.build_hasher();
112 | 
113 |         item.hash(&mut hasher1);
114 |         item.hash(&mut hasher2);
115 | 
116 |         let hash1 = hasher1.finish();
117 |         let hash2 = hasher2.finish();
118 | 
119 |         for i in 0..self.num_hashes {
120 |             // Use double hashing to generate multiple hash values
121 |             let combined_hash = hash1.wrapping_add(i as u64).wrapping_mul(hash2);
122 |             let index = (combined_hash as usize) & self.size_mask;
123 |             let byte_index = index >> 3; // Equivalent to index / 8
124 |             let bit_index = index & 0x7; // Equivalent to index % 8
125 |             if (self.bits[byte_index] & (1 << bit_index)) == 0 {
126 |                 return false;
127 |             }
128 |         }
129 |         true
130 |     }
131 | 
132 |     /// Clears the Bloom filter, removing all elements.
133 |     pub fn clear(&mut self) {
134 |         self.bits.fill(0);
135 |     }
136 | 
137 |     /// Returns the approximate number of elements in the Bloom filter.
138 |     pub fn approximate_count(&self) -> usize {
139 |         let m = self.size as f64;
140 |         let k = self.num_hashes as f64;
141 |         let x = self.count_set_bits() as f64;
142 | 
143 |         let estimate = -(m / k) * (1.0 - x / m).ln();
144 |         estimate.round() as usize
145 |     }
146 | 
147 |     /// Counts the number of bits set to 1 in the filter
148 |     fn count_set_bits(&self) -> usize {
149 |         self.bits
150 |             .iter()
151 |             .map(|&byte| byte.count_ones() as usize)
152 |             .sum()
153 |     }
154 | 
155 |     /// Returns the size of the Bloom filter in bits
156 |     pub fn size(&self) -> usize {
157 |         self.size
158 |     }
159 | 
160 |     /// Returns the number of hash functions used
161 |     pub fn num_hashes(&self) -> usize {
162 |         self.num_hashes
163 |     }
164 | 
165 |     /// Serializes the Bloom filter to a byte vector efficiently
166 |     pub fn to_bytes(&self) -> Vec<u8> {
167 |         bincode::serialize(self).unwrap_or_default()
168 |     }
169 | 
170 |     /// Deserializes a Bloom filter from a byte vector
171 |     pub fn from_bytes(bytes: &[u8]) -> Result<Self, bincode::Error> {
172 |         bincode::deserialize(bytes)
173 |     }
174 | }
175 | 
176 | #[cfg(test)]
177 | mod tests {
178 |     use super::*;
179 | 
180 |     #[test]
181 |     fn test_bloom_filter_basic() {
182 |         let mut filter = BloomFilter::new(128, 3);
183 | 
184 |         // Insert some elements
185 |         filter.insert(&"apple");
186 |         filter.insert(&"banana");
187 |         filter.insert(&"cherry");
188 | 
189 |         // These should be found
190 |         assert!(filter.contains(&"apple"));
191 |         assert!(filter.contains(&"banana"));
192 |         assert!(filter.contains(&"cherry"));
193 | 
194 |         // This should not be found
195 |         assert!(!filter.contains(&"durian"));
196 | 
197 |         // Clear the filter
198 |         filter.clear();
199 |         assert!(!filter.contains(&"apple"));
200 |     }
201 | 
202 |     #[test]
203 |     fn test_bloom_filter_with_rate() {
204 |         let filter = BloomFilter::with_rate(1000, 0.01);
205 |         assert!(filter.size() > 0);
206 |         assert!(filter.num_hashes() > 0);
207 | 
208 |         // Verify the size is a power of 2
209 |         assert_eq!(filter.size() & (filter.size() - 1), 0);
210 |     }
211 | 
212 |     #[test]
213 |     fn test_bloom_filter_serialization() {
214 |         let mut filter = BloomFilter::new(128, 3);
215 |         filter.insert(&"apple");
216 |         filter.insert(&"banana");
217 | 
218 |         let bytes = filter.to_bytes();
219 |         let deserialized = BloomFilter::from_bytes(&bytes).unwrap();
220 | 
221 |         assert!(deserialized.contains(&"apple"));
222 |         assert!(deserialized.contains(&"banana"));
223 |         assert!(!deserialized.contains(&"cherry"));
224 |     }
225 | 
226 |     #[test]
227 |     fn test_bloom_filter_approximate_count() {
228 |         let mut filter = BloomFilter::new(128, 3);
229 | 
230 |         // Insert 100 elements
231 |         for i in 0..100 {
232 |             filter.insert(&i);
233 |         }
234 | 
235 |         let count = filter.approximate_count();
236 |         // The count should be reasonably close to 100
237 |         assert!(count >= 90 && count <= 110); // Allow for more variance
238 |     }
239 | 
240 |     #[test]
241 |     fn test_bloom_filter_false_positives() {
242 |         let mut filter = BloomFilter::new(128, 3);
243 | 
244 |         // Insert some elements
245 |         for i in 0..50 {
246 |             filter.insert(&i);
247 |         }
248 | 
249 |         // Check for non-existent elements
250 |         let mut false_positives = 0;
251 |         for i in 100..200 {
252 |             if filter.contains(&i) {
253 |                 false_positives += 1;
254 |             }
255 |         }
256 | 
257 |         // False positive rate should be reasonable
258 |         assert!(false_positives < 5); // Less than 5% false positives
259 |     }
260 | }
261 | 


--------------------------------------------------------------------------------
/src/utils/file.rs:
--------------------------------------------------------------------------------
 1 | use std::fs::File;
 2 | use std::io;
 3 | use std::io::prelude::*;
 4 | 
 5 | pub fn slurp<'a>(file: &'a str) -> io::Result<String> {
 6 |     let mut file = File::open(file)?;
 7 |     let mut contents = String::new();
 8 |     file.read_to_string(&mut contents)?;
 9 |     return Ok(contents);
10 | }
11 | 


--------------------------------------------------------------------------------
/src/utils/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod bloom_filter;
2 | pub mod file;
3 | pub mod ring_buffer;
4 | pub mod transaction;
5 | 


--------------------------------------------------------------------------------
/src/utils/ring_buffer.rs:
--------------------------------------------------------------------------------
  1 | use std::mem::MaybeUninit;
  2 | use std::ops::{Index, IndexMut};
  3 | 
  4 | /// A fixed-size ring buffer that overwrites the oldest elements when full.
  5 | ///
  6 | /// The buffer has a capacity specified at creation time and stores elements of type T.
  7 | /// When the buffer is full, adding new elements will overwrite the oldest ones.
  8 | /// The capacity must be a power of 2 for efficient indexing operations.
  9 | pub struct RingBuffer<T> {
 10 |     /// The internal buffer to store elements
 11 |     buffer: Vec<MaybeUninit<T>>,
 12 |     /// The current position where the next element will be written
 13 |     write_pos: usize,
 14 |     /// The number of elements currently in the buffer
 15 |     count: usize,
 16 |     /// Mask for efficient indexing (capacity-1)
 17 |     mask: usize,
 18 | }
 19 | 
 20 | impl<T> RingBuffer<T> {
 21 |     /// Creates a new empty ring buffer with the specified capacity.
 22 |     ///
 23 |     /// # Panics
 24 |     ///
 25 |     /// Panics if capacity is not a power of 2.
 26 |     pub fn new(cap: usize) -> Self {
 27 |         assert!(cap.is_power_of_two(), "Buffer size must be a power of 2");
 28 | 
 29 |         // Create a vector with uninitialized elements
 30 |         let mut buffer = Vec::with_capacity(cap);
 31 |         // Safety: We're creating uninitialized memory that will be properly initialized before use
 32 |         unsafe {
 33 |             buffer.set_len(cap);
 34 |         }
 35 | 
 36 |         Self {
 37 |             buffer,
 38 |             write_pos: 0,
 39 |             count: 0,
 40 |             mask: cap - 1,
 41 |         }
 42 |     }
 43 | 
 44 |     /// Adds an element to the buffer, potentially overwriting the oldest element if full.
 45 |     pub fn push(&mut self, item: T) {
 46 |         let capacity = self.buffer.len();
 47 | 
 48 |         // If the buffer is full, we need to drop the oldest element
 49 |         if self.count == capacity {
 50 |             let oldest_index = (self.write_pos + capacity - self.count) & self.mask;
 51 |             unsafe {
 52 |                 std::ptr::drop_in_place(self.buffer[oldest_index].as_mut_ptr());
 53 |             }
 54 |         }
 55 | 
 56 |         self.buffer[self.write_pos] = MaybeUninit::new(item);
 57 |         self.write_pos = (self.write_pos + 1) & self.mask;
 58 |         if self.count < capacity {
 59 |             self.count += 1;
 60 |         }
 61 |     }
 62 | 
 63 |     /// Returns a reference to the element at the specified index, if it exists.
 64 |     pub fn get(&self, index: usize) -> Option<&T> {
 65 |         if index >= self.count {
 66 |             return None;
 67 |         }
 68 | 
 69 |         let capacity = self.buffer.len();
 70 |         let actual_index = (self.write_pos + capacity - self.count + index) & self.mask;
 71 |         // Safety: We know this element is initialized because index < self.count
 72 |         Some(unsafe { &*self.buffer[actual_index].as_ptr() })
 73 |     }
 74 | 
 75 |     /// Returns a mutable reference to the element at the specified index, if it exists.
 76 |     pub fn get_mut(&mut self, index: usize) -> Option<&mut T> {
 77 |         if index >= self.count {
 78 |             return None;
 79 |         }
 80 | 
 81 |         let capacity = self.buffer.len();
 82 |         let actual_index = (self.write_pos + capacity - self.count + index) & self.mask;
 83 |         // Safety: We know this element is initialized because index < self.count
 84 |         Some(unsafe { &mut *self.buffer[actual_index].as_mut_ptr() })
 85 |     }
 86 | 
 87 |     /// Returns the number of elements currently in the buffer.
 88 |     pub fn len(&self) -> usize {
 89 |         self.count
 90 |     }
 91 | 
 92 |     /// Returns true if the buffer is empty.
 93 |     pub fn is_empty(&self) -> bool {
 94 |         self.count == 0
 95 |     }
 96 | 
 97 |     /// Returns true if the buffer is full.
 98 |     pub fn is_full(&self) -> bool {
 99 |         self.count == self.buffer.len()
100 |     }
101 | 
102 |     /// Clears the buffer, removing all elements.
103 |     pub fn clear(&mut self) {
104 |         let capacity = self.buffer.len();
105 |         // Drop all initialized elements
106 |         for i in 0..self.count {
107 |             let index = (self.write_pos + capacity - self.count + i) & self.mask;
108 |             unsafe {
109 |                 std::ptr::drop_in_place(self.buffer[index].as_mut_ptr());
110 |             }
111 |         }
112 |         self.write_pos = 0;
113 |         self.count = 0;
114 |     }
115 | 
116 |     /// Returns an iterator over the elements in the buffer.
117 |     pub fn iter(&self) -> impl Iterator<Item = &T> {
118 |         (0..self.count).filter_map(move |i| self.get(i))
119 |     }
120 | 
121 |     pub fn extend(&mut self, items: impl IntoIterator<Item = T>) {
122 |         for item in items {
123 |             self.push(item);
124 |         }
125 |     }
126 | 
127 |     pub fn new_extend(items: impl IntoIterator<Item = T>, cap: usize) -> Self {
128 |         let mut buffer = Self::new(cap);
129 |         buffer.extend(items);
130 |         buffer
131 |     }
132 | 
133 |     pub fn last(&self) -> Option<&T> {
134 |         self.get(self.count - 1)
135 |     }
136 | 
137 |     pub fn capacity(&self) -> usize {
138 |         self.buffer.len()
139 |     }
140 | 
141 |     /// Merges two ring buffers into a new one, sorting the elements.
142 |     ///
143 |     /// This function takes two ring buffers and merges their elements into a new
144 |     /// ring buffer, sorting them in ascending order. The capacity of the new buffer
145 |     /// will be the same as the current buffer.
146 |     ///
147 |     /// # Arguments
148 |     ///
149 |     /// * `other` - Another ring buffer to merge with this one
150 |     ///
151 |     /// # Type Parameters
152 |     ///
153 |     /// * `T` - The type of elements in the buffer, which must implement `PartialOrd`
154 |     ///
155 |     /// # Returns
156 |     ///
157 |     /// A new `RingBuffer<T>` containing the sorted elements from both buffers.
158 |     pub fn merge_sorted<F>(&self, other: &RingBuffer<T>, compare: F) -> RingBuffer<T>
159 |     where
160 |         F: Fn(&T, &T) -> std::cmp::Ordering,
161 |         T: Clone,
162 |     {
163 |         let mut merged = RingBuffer::new(self.buffer.len());
164 |         let mut buffer = Vec::with_capacity(self.count + other.count);
165 | 
166 |         // Collect elements from both buffers
167 |         buffer.extend(self.iter().cloned());
168 |         buffer.extend(other.iter().cloned());
169 | 
170 |         // Sort the elements using the provided comparison function
171 |         buffer.sort_by(compare);
172 | 
173 |         // Add elements to the new buffer (up to capacity)
174 |         let take_count = buffer.len().min(merged.buffer.len());
175 |         if take_count > 0 {
176 |             merged.extend(buffer.into_iter().take(take_count));
177 |         }
178 | 
179 |         merged
180 |     }
181 | 
182 |     /// Merges two ring buffers into a new one, sorting the elements using their natural ordering.
183 |     ///
184 |     /// This is a convenience method that calls `merge_sorted` with a comparison function
185 |     /// that uses the natural ordering of the elements.
186 |     ///
187 |     /// # Arguments
188 |     ///
189 |     /// * `other` - Another ring buffer to merge with this one
190 |     ///
191 |     /// # Type Parameters
192 |     ///
193 |     /// * `T` - The type of elements in the buffer, which must implement `Ord`
194 |     ///
195 |     /// # Returns
196 |     ///
197 |     /// A new `RingBuffer<T>` containing the sorted elements from both buffers.
198 |     pub fn merge<T2>(&self, other: &RingBuffer<T2>) -> RingBuffer<T>
199 |     where
200 |         T: Clone + Ord,
201 |         T2: Clone,
202 |         T2: Into<T>,
203 |     {
204 |         let mut merged = RingBuffer::new(self.buffer.len());
205 |         let mut buffer = Vec::with_capacity(self.count + other.count);
206 | 
207 |         // Collect elements from both buffers
208 |         buffer.extend(self.iter().cloned());
209 |         buffer.extend(other.iter().cloned().map(|item| item.into()));
210 | 
211 |         // Sort the elements
212 |         buffer.sort();
213 | 
214 |         // Add elements to the new buffer (up to capacity)
215 |         let take_count = buffer.len().min(merged.buffer.len());
216 |         if take_count > 0 {
217 |             merged.extend(buffer.into_iter().take(take_count));
218 |         }
219 | 
220 |         merged
221 |     }
222 | }
223 | 
224 | impl<T: Clone> RingBuffer<T> {
225 |     pub fn to_vec(&self) -> Vec<T> {
226 |         self.iter().cloned().collect()
227 |     }
228 | }
229 | 
230 | impl<T: Clone + Copy> RingBuffer<T> {
231 |     pub fn extend_from_slice(&mut self, items: &[T]) {
232 |         for item in items {
233 |             self.push(*item);
234 |         }
235 |     }
236 | }
237 | 
238 | impl<T> Index<usize> for RingBuffer<T> {
239 |     type Output = T;
240 | 
241 |     fn index(&self, index: usize) -> &Self::Output {
242 |         self.get(index).expect("Index out of bounds")
243 |     }
244 | }
245 | 
246 | impl<T> IndexMut<usize> for RingBuffer<T> {
247 |     fn index_mut(&mut self, index: usize) -> &mut Self::Output {
248 |         self.get_mut(index).expect("Index out of bounds")
249 |     }
250 | }
251 | 
252 | impl<T> Default for RingBuffer<T> {
253 |     fn default() -> Self {
254 |         Self::new(16) // Default capacity of 16
255 |     }
256 | }
257 | 
258 | impl<T> Drop for RingBuffer<T> {
259 |     fn drop(&mut self) {
260 |         self.clear();
261 |     }
262 | }
263 | 
264 | #[cfg(test)]
265 | mod tests {
266 |     use super::*;
267 | 
268 |     #[test]
269 |     fn test_new_buffer_is_empty() {
270 |         let buffer: RingBuffer<i32> = RingBuffer::new(8);
271 |         assert!(buffer.is_empty());
272 |         assert_eq!(buffer.len(), 0);
273 |         assert!(!buffer.is_full());
274 |     }
275 | 
276 |     #[test]
277 |     #[should_panic(expected = "Buffer size must be a power of 2")]
278 |     fn test_non_power_of_two_size() {
279 |         let _: RingBuffer<i32> = RingBuffer::new(3);
280 |     }
281 | 
282 |     #[test]
283 |     fn test_push_and_get() {
284 |         let mut buffer: RingBuffer<i32> = RingBuffer::new(4);
285 | 
286 |         buffer.push(1);
287 |         buffer.push(2);
288 | 
289 |         assert_eq!(buffer.get(0), Some(&1));
290 |         assert_eq!(buffer.get(1), Some(&2));
291 |         assert_eq!(buffer.get(2), None);
292 | 
293 |         assert_eq!(buffer.len(), 2);
294 |         assert!(!buffer.is_empty());
295 |         assert!(!buffer.is_full());
296 |     }
297 | 
298 |     #[test]
299 |     fn test_overwrite_when_full() {
300 |         let mut buffer: RingBuffer<i32> = RingBuffer::new(4);
301 | 
302 |         buffer.push(1);
303 |         buffer.push(2);
304 |         buffer.push(3);
305 |         buffer.push(4);
306 |         assert!(buffer.is_full());
307 | 
308 |         // This should overwrite the oldest element (1)
309 |         buffer.push(5);
310 | 
311 |         assert_eq!(buffer.get(0), Some(&2));
312 |         assert_eq!(buffer.get(1), Some(&3));
313 |         assert_eq!(buffer.get(2), Some(&4));
314 |         assert_eq!(buffer.get(3), Some(&5));
315 |         assert_eq!(buffer.len(), 4);
316 |     }
317 | 
318 |     #[test]
319 |     fn test_index_operator() {
320 |         let mut buffer: RingBuffer<i32> = RingBuffer::new(4);
321 | 
322 |         buffer.push(1);
323 |         buffer.push(2);
324 | 
325 |         assert_eq!(buffer[0], 1);
326 |         assert_eq!(buffer[1], 2);
327 |     }
328 | 
329 |     #[test]
330 |     #[should_panic(expected = "Index out of bounds")]
331 |     fn test_index_out_of_bounds() {
332 |         let buffer: RingBuffer<i32> = RingBuffer::new(4);
333 |         let _ = buffer[0]; // This should panic
334 |     }
335 | 
336 |     #[test]
337 |     fn test_clear() {
338 |         let mut buffer: RingBuffer<i32> = RingBuffer::new(4);
339 | 
340 |         buffer.push(1);
341 |         buffer.push(2);
342 |         buffer.clear();
343 | 
344 |         assert!(buffer.is_empty());
345 |         assert_eq!(buffer.len(), 0);
346 |         assert_eq!(buffer.get(0), None);
347 |     }
348 | 
349 |     #[test]
350 |     fn test_iter() {
351 |         let mut buffer: RingBuffer<i32> = RingBuffer::new(4);
352 | 
353 |         buffer.push(1);
354 |         buffer.push(2);
355 |         buffer.push(3);
356 | 
357 |         let collected: Vec<i32> = buffer.iter().cloned().collect();
358 |         assert_eq!(collected, vec![1, 2, 3]);
359 |     }
360 | 
361 |     #[test]
362 |     fn test_get_mut() {
363 |         let mut buffer: RingBuffer<i32> = RingBuffer::new(4);
364 | 
365 |         buffer.push(1);
366 |         buffer.push(2);
367 | 
368 |         if let Some(val) = buffer.get_mut(0) {
369 |             *val = 10;
370 |         }
371 | 
372 |         assert_eq!(buffer.get(0), Some(&10));
373 |         assert_eq!(buffer.get(1), Some(&2));
374 |     }
375 | 
376 |     #[test]
377 |     fn test_wrap_around() {
378 |         let mut buffer: RingBuffer<i32> = RingBuffer::new(4);
379 | 
380 |         // Fill the buffer
381 |         for i in 1..=4 {
382 |             buffer.push(i);
383 |         }
384 | 
385 |         // Push more elements to test wrap-around
386 |         buffer.push(5);
387 |         buffer.push(6);
388 | 
389 |         // Check that the oldest elements were overwritten
390 |         assert_eq!(buffer.get(0), Some(&3));
391 |         assert_eq!(buffer.get(1), Some(&4));
392 |         assert_eq!(buffer.get(2), Some(&5));
393 |         assert_eq!(buffer.get(3), Some(&6));
394 |     }
395 | 
396 |     #[test]
397 |     fn test_bit_masking() {
398 |         let mut buffer: RingBuffer<i32> = RingBuffer::new(8);
399 | 
400 |         // Fill the buffer and then some
401 |         for i in 0..12 {
402 |             buffer.push(i);
403 |         }
404 | 
405 |         // The buffer should contain the last 8 elements (4-11)
406 |         for i in 0..8 {
407 |             assert_eq!(buffer.get(i), Some(&(i as i32 + 4)));
408 |         }
409 |     }
410 | 
411 |     #[test]
412 |     fn test_merge_sorted() {
413 |         let mut buffer1: RingBuffer<i32> = RingBuffer::new(4);
414 |         let mut buffer2: RingBuffer<i32> = RingBuffer::new(4);
415 | 
416 |         buffer1.push(1);
417 |         buffer1.push(3);
418 |         buffer1.push(5);
419 | 
420 |         buffer2.push(2);
421 |         buffer2.push(4);
422 |         buffer2.push(6);
423 | 
424 |         let merged = buffer1.merge_sorted(&buffer2, |a, b| a.cmp(b));
425 | 
426 |         assert_eq!(merged.count, 4); // Limited by capacity
427 |         assert_eq!(merged.get(0), Some(&1));
428 |         assert_eq!(merged.get(1), Some(&2));
429 |         assert_eq!(merged.get(2), Some(&3));
430 |         assert_eq!(merged.get(3), Some(&4));
431 |     }
432 | 
433 |     #[test]
434 |     fn test_merge() {
435 |         let mut buffer1: RingBuffer<i32> = RingBuffer::new(4);
436 |         let mut buffer2: RingBuffer<i32> = RingBuffer::new(4);
437 | 
438 |         buffer1.push(5);
439 |         buffer1.push(3);
440 |         buffer1.push(1);
441 | 
442 |         buffer2.push(6);
443 |         buffer2.push(4);
444 |         buffer2.push(2);
445 | 
446 |         let merged = buffer1.merge(&buffer2);
447 | 
448 |         assert_eq!(merged.count, 4); // Limited by capacity
449 |         assert_eq!(merged.get(0), Some(&1));
450 |         assert_eq!(merged.get(1), Some(&2));
451 |         assert_eq!(merged.get(2), Some(&3));
452 |         assert_eq!(merged.get(3), Some(&4));
453 |     }
454 | 
455 |     #[test]
456 |     fn test_merge_with_conversion() {
457 |         let mut buffer1: RingBuffer<i32> = RingBuffer::new(4);
458 |         let mut buffer2: RingBuffer<i32> = RingBuffer::new(4);
459 | 
460 |         buffer1.push(5);
461 |         buffer1.push(3);
462 |         buffer1.push(1);
463 | 
464 |         buffer2.push(6);
465 |         buffer2.push(4);
466 |         buffer2.push(2);
467 | 
468 |         let merged = buffer1.merge(&buffer2);
469 | 
470 |         assert_eq!(merged.count, 4); // Limited by capacity
471 |         assert_eq!(merged.get(0), Some(&1));
472 |         assert_eq!(merged.get(1), Some(&2));
473 |         assert_eq!(merged.get(2), Some(&3));
474 |         assert_eq!(merged.get(3), Some(&4));
475 |     }
476 | 
477 |     #[test]
478 |     fn test_merge_sorted_custom_comparison() {
479 |         let mut buffer1: RingBuffer<i32> = RingBuffer::new(4);
480 |         let mut buffer2: RingBuffer<i32> = RingBuffer::new(4);
481 | 
482 |         buffer1.push(1);
483 |         buffer1.push(3);
484 |         buffer1.push(5);
485 | 
486 |         buffer2.push(2);
487 |         buffer2.push(4);
488 |         buffer2.push(6);
489 | 
490 |         // Reverse order comparison
491 |         let merged = buffer1.merge_sorted(&buffer2, |a, b| b.cmp(a));
492 | 
493 |         assert_eq!(merged.count, 4); // Limited by capacity
494 |         assert_eq!(merged.get(0), Some(&6));
495 |         assert_eq!(merged.get(1), Some(&5));
496 |         assert_eq!(merged.get(2), Some(&4));
497 |         assert_eq!(merged.get(3), Some(&3));
498 |     }
499 | }
500 | 


--------------------------------------------------------------------------------
/src/utils/transaction.rs:
--------------------------------------------------------------------------------
 1 | use dovahkiin::types::{Map, OwnedValue};
 2 | use neb::client::transaction::{Transaction, TxnError};
 3 | use neb::ram::types::Id;
 4 | 
 5 | pub async fn set_map_by_key_id(
 6 |     txn: &Transaction,
 7 |     cell_id: Id,
 8 |     key_id: u64,
 9 |     value: OwnedValue,
10 | ) -> Result<Option<()>, TxnError> {
11 |     match txn.read(cell_id).await? {
12 |         Some(mut cell) => {
13 |             if let &mut OwnedValue::Map(ref mut map) = &mut cell.data {
14 |                 map.insert_key_id(key_id, value);
15 |             } else {
16 |                 return Ok(None);
17 |             }
18 |             txn.update(cell).await?;
19 |             return Ok(Some(()));
20 |         }
21 |         None => Ok(None),
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/server.rs:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/test.rs:
--------------------------------------------------------------------------------
1 | mod server;
2 | 


--------------------------------------------------------------------------------