├── .gitignore ├── Cargo.toml ├── benches └── long_string.rs ├── license.md ├── readme.md └── src ├── doc.rs ├── json.rs ├── json ├── sequence.rs ├── test.rs ├── tree.rs └── value.rs ├── lib.rs └── opset.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crudite" 3 | version = "0.1.0" 4 | authors = ["Robert Lord "] 5 | edition = "2018" 6 | license = "MIT" 7 | description = "A JSON CRDT" 8 | repository = "https://github.com/lord/crudite" 9 | 10 | [lib] 11 | bench = false 12 | 13 | [dependencies] 14 | uuid = "0.7" 15 | serde = { version = "1.0", features = ["derive"] } 16 | im = "13.0" 17 | 18 | [dev-dependencies] 19 | criterion = "0.3" 20 | 21 | [[bench]] 22 | name = "long_string" 23 | harness = false 24 | -------------------------------------------------------------------------------- /benches/long_string.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | 4 | use crudite::*; 5 | 6 | use criterion::black_box; 7 | use criterion::Criterion; 8 | 9 | fn criterion_benchmark(c: &mut Criterion) { 10 | c.bench_function("character insert", |b| { 11 | let mut doc = Doc::new(); 12 | doc.update(DocOp { 13 | timestamp: 1, 14 | edits: vec![ 15 | tree::Edit::TextCreate { id: Id { num: 1 } }, 16 | tree::Edit::MapInsert { 17 | parent: ROOT_ID, 18 | key: "my key".to_string(), 19 | item: tree::Value::Collection(Id { num: 1 }), 20 | }, 21 | ], 22 | }); 23 | 24 | let mut i = 2; 25 | b.iter(|| { 26 | doc.update(DocOp { 27 | timestamp: i as u64, 28 | edits: vec![black_box(tree::Edit::TextInsert { 29 | prev: Id { num: i - 1 }, 30 | id: Id { num: i }, 31 | character: if i % 2 == 0 { 'a' } else { 'b' }, 32 | })], 33 | }); 34 | i += 1; 35 | }); 36 | black_box(doc); 37 | }); 38 | } 39 | 40 | criterion_group!(benches, criterion_benchmark); 41 | criterion_main!(benches); 42 | -------------------------------------------------------------------------------- /license.md: -------------------------------------------------------------------------------- 1 | Copyright 2019 Robert Lord 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # crudite 2 | 3 | CRDT library. 4 | 5 | ## Requirements 6 | 7 | - performant even on massive data structures 8 | - allows syncing just part of a massive data structure 9 | - allows arbitrary data structures, with a structured tree-like representation 10 | - has a way of representing, inserting, and splicing text 11 | - bring-your-own network code, but CRDT algorithm does not require a central server 12 | 13 | ## notes 14 | 15 | starting with simple opset crdt, no splicing 16 | 17 | 18 | ## todo 19 | 20 | - [x] implement operation linearization 21 | - [x] add CRDT tests 22 | - [x] actual op/edit/crdt struct that combines opsets and jsontree 23 | - [x] add number type to json tree 24 | - [x] finish update fn for Edit 25 | - [x] cleanup code before arrays 26 | - [x] test for character deletion, make sure segments merge 27 | - [x] check case where segment can't merge but reaches zero length 28 | - [x] move segment code into separate module, test segments properly 29 | - [x] don't actually need to delete segments until we have garbage collection 30 | - [x] add array type to json tree 31 | - [x] moving objects needs some work; need to check for cycles, need to remove item from previous parent. maybe can combine this fix with the orphaning system. 32 | - [x] update tests to actually test all those new parent() fns 33 | - [x] finish upgrading tests to public Value APIs, add more Value methods, esp. for ergonomic string/list index access 34 | - [ ] system for constructing a StringRef or ObjectRef or ArrayRef in the first place 35 | - [ ] need to keep track of parents; you shouldn't need to delete to re-add, this makes it so multiple moves from the same place won't compose correctly. 36 | 37 | ## future work 38 | 39 | - [ ] bad IDs are currently ignored by `DocOp`'s `apply`. is this right? how can we prevent malicious reuse of ids? central server validation? 40 | - [ ] fuzz for panics and other bugs 41 | - [ ] garbage collection 42 | - [ ] selective subtree sync 43 | - [ ] splice operations? 44 | - [ ] maybe edits aren't actually ord, figure out id system for edits so we can delete them as well? will allow us to have floats also 45 | - [ ] figure out true cost of all the tree deleting. can we speed up or defer the deletions when an old edit is inserted early in the oplist, or when object_assign deletes a large subtree? 46 | 47 | ## gc notes 48 | 49 | - we can also 'freeze' document state at a particular point in time. this is called a 'baseline'. some user-based crdt folks think this is unnecessary bc user-edited data isn't large. but it'd be nice to have an entire database that is a crdt. this could be way too many edits. synchronizing entire histories isn't possible. 50 | - considering we can serialize the state of the document at a particular point in the operation tree, i think we can pass to a new client the document state as it existed at baseline T and only have to send edits that have occured since T in the timeline. 51 | - however due to the asynchronous nature of offline mode, we can't ensure all clients have seen T. so what can we do? i'm comfortable with having a central server of some kind keeping track of all edits, not just the garbage collected ones since T occurred. perhaps it's possible this central server can operational-transform modify the edits to happen later? unfortunately this runs into the classic OT problem of `O(N^2)` performance, where `N` is the number of changes in each distinct site. the opset CRDT algorithm lets us insert characters very early in the algorithm and can merge in ~`O(N)`, assuming constant time operation processing. 52 | - perhaps we can update a site's baseline. the central server pays the `O(N)` cost to run the new edits up to baseline `T`, to form a new state `T'`. it then diffs `T` and `T'` and sends the diffs to any site that is using `T`, which applies the changes to their baseline. is diffing an expensive operation? can it be made cheaper? we could keep track of anything that was edited at all while running the new edits up to the baseline, consider those dirty, and just diff those? 53 | -------------------------------------------------------------------------------- /src/doc.rs: -------------------------------------------------------------------------------- 1 | use crate::json; 2 | use crate::opset; 3 | use std::cmp::Ordering; 4 | 5 | const CACHE_GAP: usize = 10; 6 | 7 | #[derive(Clone, Debug, PartialEq, Eq, Hash)] 8 | pub struct Id { 9 | pub num: usize, 10 | } 11 | 12 | pub const ROOT_ID: Id = Id { num: 0 }; 13 | 14 | #[derive(Clone, Debug, PartialEq, Eq)] 15 | pub struct DocOp { 16 | pub timestamp: u64, 17 | pub edits: Vec>, 18 | } 19 | impl PartialOrd for DocOp { 20 | fn partial_cmp(&self, other: &DocOp) -> Option { 21 | Some(self.cmp(other)) 22 | } 23 | } 24 | impl Ord for DocOp { 25 | fn cmp(&self, other: &DocOp) -> Ordering { 26 | // TODO fix this ordering 27 | self.timestamp.cmp(&other.timestamp) 28 | } 29 | } 30 | 31 | impl opset::Operation> for DocOp { 32 | fn apply(&self, tree: &mut json::Tree) { 33 | for edit in &self.edits { 34 | let _ = tree.update(edit); 35 | } 36 | } 37 | } 38 | 39 | pub struct Doc { 40 | opset: opset::Opset>, 41 | } 42 | 43 | impl Doc { 44 | pub fn new() -> Doc { 45 | Doc { 46 | opset: opset::Opset::new(json::Tree::new_with_object_root(ROOT_ID), CACHE_GAP), 47 | } 48 | } 49 | 50 | pub fn update(&mut self, op: DocOp) { 51 | self.opset.update(op); 52 | } 53 | 54 | pub fn update_from_iter>(&mut self, iter: I) { 55 | self.opset.update_from_iter(iter); 56 | } 57 | 58 | pub fn tree(&self) -> &json::Tree { 59 | self.opset.state() 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/json.rs: -------------------------------------------------------------------------------- 1 | mod sequence; 2 | #[cfg(test)] 3 | mod test; 4 | mod tree; 5 | mod value; 6 | 7 | pub use tree::{Edit, Tree, TreeError}; 8 | pub use value::{ArrayIndex, ArrayRef, ObjectRef, StringIndex, StringRef, Value}; 9 | -------------------------------------------------------------------------------- /src/json/sequence.rs: -------------------------------------------------------------------------------- 1 | use super::tree::{Node, NodeId, Tree, TreeError}; 2 | use std::fmt::Debug; 3 | use std::hash::Hash; 4 | 5 | const SPLIT_LEN: usize = 1024; 6 | 7 | /// `insert_fn(index to insert in contents at, node to insert into) -> length of inserted item` 8 | pub(super) fn insert) -> usize>( 9 | tree: &mut Tree, 10 | append_id: Id, 11 | character_id: Id, 12 | insert_fn: F, 13 | ) -> Result<(), TreeError> { 14 | if tree.id_to_node.contains_key(&character_id) { 15 | return Err(TreeError::DuplicateId); 16 | } 17 | let (node_id, string_index, id_list_index) = lookup_insertion_point(tree, &append_id)?; 18 | let insert_len = insert_fn(string_index, &mut tree.nodes[&node_id]); 19 | let ids = tree.nodes[&node_id].segment_ids_mut()?; 20 | // contents.insert(string_index, character); 21 | for (_, index_opt) in ids.iter_mut().skip(id_list_index) { 22 | if let Some(index) = index_opt { 23 | *index += insert_len; 24 | } 25 | } 26 | tree.id_to_node.insert(character_id.clone(), node_id); 27 | ids.insert(id_list_index, (character_id, Some(string_index))); 28 | consider_split(tree, node_id); 29 | Ok(()) 30 | } 31 | 32 | pub(super) fn delete) -> usize>( 33 | tree: &mut Tree, 34 | char_id: Id, 35 | delete_fn: F, 36 | ) -> Result<(), TreeError> { 37 | let (node_id, id_list_index) = lookup_id_index(tree, &char_id)?; 38 | if let Some(old_byte_index) = tree.nodes[&node_id].segment_ids_mut()?[id_list_index] 39 | .1 40 | .take() 41 | { 42 | let delete_len = delete_fn(old_byte_index, &mut tree.nodes[&node_id]); 43 | for (_, byte_idx) in tree.nodes[&node_id] 44 | .segment_ids_mut()? 45 | .iter_mut() 46 | .skip(id_list_index) 47 | { 48 | if let Some(byte_idx) = byte_idx { 49 | *byte_idx -= delete_len; 50 | } 51 | } 52 | } 53 | Ok(()) 54 | } 55 | 56 | // Inserts a new, empty segment after `to_split`, and returns the usize of the new node. 57 | fn insert_segment( 58 | tree: &mut Tree, 59 | to_split: NodeId, 60 | id_split_index: usize, 61 | ) -> NodeId { 62 | let new_id = tree.next_id(); 63 | // split old node; insert into tree 64 | { 65 | let parent = tree.nodes[&to_split].parent; 66 | let mut node = Node { 67 | parent: parent, 68 | data: tree.nodes[&to_split].segment_create(), 69 | }; 70 | let contents_len = tree.nodes[&to_split].segment_contents_len().unwrap(); 71 | let split_start_string = tree.nodes[&to_split] 72 | .segment_ids_mut() 73 | .unwrap() 74 | .iter() 75 | .skip(id_split_index) 76 | .find_map(|(_, byte_idx)| byte_idx.clone()) 77 | .unwrap_or(contents_len); 78 | let new_ids: Vec<(Id, Option)> = tree.nodes[&to_split] 79 | .segment_ids_mut() 80 | .unwrap() 81 | .split_off(id_split_index) 82 | .into_iter() 83 | .map(|(id, n)| (id, n.map(|n| n - split_start_string))) 84 | .collect(); 85 | tree.nodes[&to_split].segment_split_contents_into(&mut node, split_start_string); 86 | for (id, _) in &new_ids { 87 | tree.id_to_node[id] = new_id; 88 | } 89 | *node.segment_ids_mut().unwrap() = new_ids; 90 | tree.nodes.insert(new_id, node); 91 | } 92 | 93 | // adjust to_split, which is the segment before new_id 94 | let old_to_split_next = { 95 | let (_, next) = tree.nodes[&to_split].segment_adjacencies_mut(); 96 | let old = *next; 97 | *next = new_id; 98 | old 99 | }; 100 | 101 | // adjust the new node 102 | { 103 | let (prev, next) = tree.nodes[&new_id].segment_adjacencies_mut(); 104 | *prev = to_split; 105 | *next = old_to_split_next; 106 | } 107 | 108 | // adjust the node after `to_split` 109 | { 110 | let (prev, _) = tree.nodes[&old_to_split_next].segment_adjacencies_mut(); 111 | *prev = new_id; 112 | } 113 | 114 | new_id 115 | } 116 | 117 | fn lookup_id_index( 118 | tree: &Tree, 119 | lookup_id: &Id, 120 | ) -> Result<(NodeId, usize), TreeError> { 121 | let node_id = tree.id_to_node(&lookup_id)?; 122 | let node = tree 123 | .nodes 124 | .get(&node_id) 125 | .expect("node_id listed in id_to_node did not exist."); 126 | 127 | let ids = node.segment_ids()?; 128 | 129 | for (i, (id, _)) in ids.iter().enumerate() { 130 | if id == lookup_id { 131 | return Ok((node_id, i)); 132 | // don't check for string index until next iteration of loop; we want the *next* 133 | // char index to be the insertion point, not this one 134 | } 135 | } 136 | panic!("couldn't find id in list"); 137 | } 138 | 139 | /// From a character id, looks up the `(containing segment id, character index, id list index)` 140 | /// that an appended character would need to be inserted at 141 | fn lookup_insertion_point( 142 | tree: &Tree, 143 | lookup_id: &Id, 144 | ) -> Result<(NodeId, usize, usize), TreeError> { 145 | let node_id = tree.id_to_node(&lookup_id)?; 146 | let node = tree 147 | .nodes 148 | .get(&node_id) 149 | .expect("node_id listed in id_to_node did not exist."); 150 | if node.segment_is_container() { 151 | let (_, start) = node.segment_adjacencies(); 152 | return Ok((*start, 0, 0)); 153 | } 154 | let ids = node.segment_ids()?; 155 | 156 | let mut id_list_index_opt = None; 157 | for (i, (id, string_index_opt)) in ids.iter().enumerate() { 158 | if let Some(id_list_index) = id_list_index_opt { 159 | if let Some(string_index) = string_index_opt { 160 | return Ok((node_id, *string_index, id_list_index)); 161 | } 162 | } 163 | if id == lookup_id { 164 | id_list_index_opt = Some(i + 1); 165 | // don't check for string index until next iteration of loop; we want the *next* 166 | // char index to be the insertion point, not this one 167 | } 168 | } 169 | if let Some(id_list_index) = id_list_index_opt { 170 | return Ok((node_id, node.segment_contents_len()?, id_list_index)); 171 | } 172 | panic!("id not found in segment id list"); 173 | } 174 | 175 | /// If `segment` is greater than `SPLIT_LEN`, we'll split it into two pieces. This recurses on 176 | /// the children, further splitting them if they're still too long. Returns the leftmost and 177 | /// rightmost of the new segments; if no split occured, these will both still be `segment`. 178 | // TODO this could probably be sped up to instantly segment a very long node into `n` children. 179 | fn consider_split( 180 | tree: &mut Tree, 181 | segment: NodeId, 182 | ) -> (NodeId, NodeId) { 183 | if tree.nodes[&segment].segment_is_container() { 184 | // abort if this is off the edge of a string 185 | return (segment, segment); 186 | } 187 | let ids = tree.nodes[&segment].segment_ids_mut().unwrap(); 188 | if ids.len() <= SPLIT_LEN { 189 | return (segment, segment); 190 | } 191 | // the first index of the second segment. need to do this stuff to make sure we split 192 | // along a codepoint boundary 193 | let split_start_vec = ids.len() / 2; 194 | let new_node_id = insert_segment(tree, segment, split_start_vec); 195 | let (left, _) = consider_split(tree, segment); 196 | let (_, right) = consider_split(tree, new_node_id); 197 | (left, right) 198 | } 199 | -------------------------------------------------------------------------------- /src/json/test.rs: -------------------------------------------------------------------------------- 1 | use super::tree::*; 2 | use super::value::{self, Value}; 3 | #[derive(Clone, PartialEq, Eq, Hash, Debug)] 4 | struct MyId(usize); 5 | 6 | fn vals_to_nums(vals: Vec>) -> Vec { 7 | vals.iter() 8 | .map(|val| match val { 9 | Value::Int(i) => *i, 10 | _ => panic!("unexpected type in list"), 11 | }) 12 | .collect() 13 | } 14 | 15 | #[test] 16 | fn object_assignment() { 17 | let mut tree = Tree::new_with_object_root(MyId(0)); 18 | 19 | // {} 20 | // ^ 21 | // 0 22 | assert_eq!( 23 | Ok(value::Parent::None), 24 | value::ObjectRef(MyId(0)).parent(&tree) 25 | ); 26 | 27 | tree.update(&Edit::MapCreate { 28 | id: value::ObjectRef(MyId(1)), 29 | }) 30 | .unwrap(); 31 | tree.update(&Edit::MapInsert { 32 | parent: value::ObjectRef(MyId(0)), 33 | key: "my key".to_string(), 34 | item: Value::Object(value::ObjectRef(MyId(1))), 35 | }) 36 | .unwrap(); 37 | 38 | tree.update(&Edit::TextCreate { 39 | id: value::StringRef(MyId(2)), 40 | }) 41 | .unwrap(); 42 | tree.update(&Edit::MapInsert { 43 | parent: value::ObjectRef(MyId(1)), 44 | key: "my key 2".to_string(), 45 | item: Value::Object(value::ObjectRef(MyId(2))), 46 | }) 47 | .unwrap(); 48 | 49 | tree.insert_character(MyId(2), MyId(3), 'a').unwrap(); 50 | 51 | tree.delete_orphans(); 52 | 53 | // {"my key": {"my key 2": "a"}} 54 | // ^ ^ ^^ 55 | // 0 1 23 56 | assert_eq!(Ok(NodeType::Object), tree.get_type(MyId(0))); 57 | assert_eq!(Ok(NodeType::Object), tree.get_type(MyId(1))); 58 | assert_eq!(Ok(NodeType::String), tree.get_type(MyId(2))); 59 | assert_eq!(Ok(NodeType::Character), tree.get_type(MyId(3))); 60 | assert_eq!( 61 | Ok(value::Parent::Object(value::ObjectRef(MyId(0)))), 62 | value::ObjectRef(MyId(1)).parent(&tree) 63 | ); 64 | assert_eq!( 65 | Ok(value::Parent::Object(value::ObjectRef(MyId(1)))), 66 | value::ObjectRef(MyId(2)).parent(&tree) 67 | ); 68 | assert_eq!( 69 | Ok(value::StringRef(MyId(2))), 70 | value::StringIndex(MyId(3)).parent(&tree) 71 | ); 72 | assert_eq!( 73 | Ok(Value::Object(value::ObjectRef(MyId(1)))), 74 | value::ObjectRef(MyId(0)).get(&tree, "my key") 75 | ); 76 | assert_eq!( 77 | Ok(Value::String(value::StringRef(MyId(2)))), 78 | value::ObjectRef(MyId(1)).get(&tree, "my key 2") 79 | ); 80 | assert_eq!( 81 | Ok(Value::Unset), 82 | value::ObjectRef(MyId(0)).get(&tree, "my key 2") 83 | ); 84 | 85 | tree.update(&Edit::MapInsert { 86 | parent: value::ObjectRef(MyId(0)), 87 | key: "my key".to_string(), 88 | item: Value::True, 89 | }) 90 | .unwrap(); 91 | tree.delete_orphans(); // {"my key": true} 92 | // ^ ^ 93 | // 0 4 94 | assert_eq!(Ok(NodeType::Object), tree.get_type(MyId(0))); 95 | assert_eq!(Err(TreeError::UnknownId), tree.get_type(MyId(1))); 96 | assert_eq!(Err(TreeError::UnknownId), tree.get_type(MyId(2))); 97 | assert_eq!(Err(TreeError::UnknownId), tree.get_type(MyId(3))); 98 | assert_eq!( 99 | Ok(value::Parent::None), 100 | value::ObjectRef(MyId(0)).parent(&tree) 101 | ); 102 | assert_eq!( 103 | Err(TreeError::UnknownId), 104 | value::ObjectRef(MyId(1)).parent(&tree) 105 | ); 106 | assert_eq!( 107 | Err(TreeError::UnknownId), 108 | value::ObjectRef(MyId(2)).parent(&tree) 109 | ); 110 | assert_eq!( 111 | Err(TreeError::UnknownId), 112 | value::StringIndex(MyId(3)).parent(&tree) 113 | ); 114 | 115 | assert_eq!( 116 | Ok(Value::True), 117 | value::ObjectRef(MyId(0)).get(&tree, "my key") 118 | ); 119 | 120 | tree.update(&Edit::MapInsert { 121 | parent: value::ObjectRef(MyId(0)), 122 | key: "my key".to_string(), 123 | item: Value::Unset, 124 | }) 125 | .unwrap(); 126 | tree.delete_orphans(); // {} 127 | // ^ 128 | // 0 129 | assert_eq!(Ok(NodeType::Object), tree.get_type(MyId(0))); 130 | assert_eq!(Err(TreeError::UnknownId), tree.get_type(MyId(1))); 131 | assert_eq!(Err(TreeError::UnknownId), tree.get_type(MyId(2))); 132 | assert_eq!(Err(TreeError::UnknownId), tree.get_type(MyId(3))); 133 | assert_eq!(Err(TreeError::UnknownId), tree.get_type(MyId(4))); 134 | assert_eq!( 135 | Ok(Value::Unset), 136 | value::ObjectRef(MyId(0)).get(&tree, "my key") 137 | ); 138 | } 139 | 140 | #[test] 141 | fn invalid_ids_error() { 142 | let mut tree = Tree::new_with_string_root(MyId(0)); 143 | assert_eq!(tree.insert_character(MyId(0), MyId(1), 'a'), Ok(())); 144 | assert_eq!( 145 | tree.insert_character(MyId(0), MyId(1), 'a'), 146 | Err(TreeError::DuplicateId) 147 | ); 148 | assert_eq!( 149 | tree.insert_character(MyId(1), MyId(0), 'a'), 150 | Err(TreeError::DuplicateId) 151 | ); 152 | assert_eq!( 153 | tree.insert_character(MyId(2), MyId(5), 'a'), 154 | Err(TreeError::UnknownId) 155 | ); 156 | assert_eq!(tree.delete_character(MyId(2)), Err(TreeError::UnknownId)); 157 | assert_eq!( 158 | tree.delete_character(MyId(0)), 159 | Err(TreeError::UnexpectedNodeType) 160 | ); 161 | } 162 | 163 | #[test] 164 | fn simple_delete() { 165 | let mut tree = Tree::new_with_string_root(MyId(0)); 166 | tree.insert_character(MyId(0), MyId(1), 'a').unwrap(); 167 | assert_eq!( 168 | value::StringRef(MyId(0)).to_string(&tree), 169 | Ok("a".to_string()) 170 | ); 171 | tree.insert_character(MyId(1), MyId(2), 'b').unwrap(); 172 | assert_eq!( 173 | value::StringRef(MyId(0)).to_string(&tree), 174 | Ok("ab".to_string()) 175 | ); 176 | tree.delete_character(MyId(1)).unwrap(); 177 | assert_eq!( 178 | value::StringRef(MyId(0)).to_string(&tree), 179 | Ok("b".to_string()) 180 | ); 181 | // test delete same char; should be noop 182 | tree.delete_character(MyId(1)).unwrap(); 183 | assert_eq!( 184 | value::StringRef(MyId(0)).to_string(&tree), 185 | Ok("b".to_string()) 186 | ); 187 | tree.delete_character(MyId(2)).unwrap(); 188 | assert_eq!( 189 | value::StringRef(MyId(0)).to_string(&tree), 190 | Ok("".to_string()) 191 | ); 192 | } 193 | 194 | #[test] 195 | fn insert_and_delete_characters() { 196 | fn num_to_char(i: usize) -> char { 197 | match i % 5 { 198 | 0 => '0', 199 | 1 => '1', 200 | 2 => '2', 201 | 3 => '3', 202 | _ => '4', 203 | } 204 | } 205 | 206 | let mut tree = Tree::new_with_string_root(MyId(0)); 207 | tree.insert_character(MyId(0), MyId(1), 'a').unwrap(); 208 | assert_eq!( 209 | value::StringRef(MyId(0)).to_string(&tree), 210 | Ok("a".to_string()) 211 | ); 212 | tree.insert_character(MyId(1), MyId(2), 'b').unwrap(); 213 | assert_eq!( 214 | value::StringRef(MyId(0)).to_string(&tree), 215 | Ok("ab".to_string()) 216 | ); 217 | tree.insert_character(MyId(1), MyId(3), 'c').unwrap(); 218 | assert_eq!( 219 | value::StringRef(MyId(0)).to_string(&tree), 220 | Ok("acb".to_string()) 221 | ); 222 | tree.insert_character(MyId(0), MyId(4), 'd').unwrap(); 223 | assert_eq!( 224 | value::StringRef(MyId(0)).to_string(&tree), 225 | Ok("dacb".to_string()) 226 | ); 227 | for i in 5..10000 { 228 | tree.insert_character(MyId(i - 1), MyId(i), num_to_char(i)) 229 | .unwrap(); 230 | } 231 | 232 | let long_insert = (5..10000).map(|i| num_to_char(i)).collect::(); 233 | assert_eq!( 234 | value::StringRef(MyId(0)).to_string(&tree), 235 | Ok(format!("d{}acb", long_insert)) 236 | ); 237 | 238 | for i in 5..10000 { 239 | tree.delete_character(MyId(i)).unwrap(); 240 | } 241 | 242 | assert_eq!( 243 | value::StringRef(MyId(0)).to_string(&tree), 244 | Ok(format!("dacb")) 245 | ); 246 | } 247 | 248 | #[test] 249 | fn insert_and_delete_list_of_nums() { 250 | fn num_to_value(i: usize) -> Value { 251 | Value::Int(i as i64) 252 | } 253 | 254 | let mut tree = Tree::new_with_array_root(MyId(0)); 255 | tree.insert_list_item(MyId(0), MyId(1), Value::Int(1)) 256 | .unwrap(); 257 | assert_eq!( 258 | value::ArrayRef(MyId(0)).to_vec(&tree).map(vals_to_nums), 259 | Ok(vec![1]) 260 | ); 261 | tree.insert_list_item(MyId(1), MyId(2), Value::Int(2)) 262 | .unwrap(); 263 | assert_eq!( 264 | value::ArrayRef(MyId(0)).to_vec(&tree).map(vals_to_nums), 265 | Ok(vec![1, 2]) 266 | ); 267 | tree.insert_list_item(MyId(1), MyId(3), Value::Int(3)) 268 | .unwrap(); 269 | assert_eq!( 270 | value::ArrayRef(MyId(0)).to_vec(&tree).map(vals_to_nums), 271 | Ok(vec![1, 3, 2]) 272 | ); 273 | tree.insert_list_item(MyId(0), MyId(4), Value::Int(4)) 274 | .unwrap(); 275 | assert_eq!( 276 | value::ArrayRef(MyId(0)).to_vec(&tree).map(vals_to_nums), 277 | Ok(vec![4, 1, 3, 2]) 278 | ); 279 | for i in 5..10000 { 280 | tree.insert_list_item(MyId(i - 1), MyId(i), num_to_value(i)) 281 | .unwrap(); 282 | } 283 | 284 | let mut long_insert = (5..10000).map(|i| i).collect::>(); 285 | long_insert.insert(0, 4); 286 | long_insert.push(1); 287 | long_insert.push(3); 288 | long_insert.push(2); 289 | assert_eq!( 290 | value::ArrayRef(MyId(0)).to_vec(&tree).map(vals_to_nums), 291 | Ok(long_insert) 292 | ); 293 | 294 | for i in 5..10000 { 295 | tree.delete_list_item(MyId(i)).unwrap(); 296 | } 297 | 298 | assert_eq!( 299 | value::ArrayRef(MyId(0)).to_vec(&tree).map(vals_to_nums), 300 | Ok(vec![4, 1, 3, 2]) 301 | ); 302 | } 303 | 304 | #[test] 305 | fn cant_move_things_with_object_parents() { 306 | let mut tree = Tree::new_with_object_root(MyId(0)); 307 | tree.update(&Edit::MapCreate { 308 | id: value::ObjectRef(MyId(1)), 309 | }) 310 | .unwrap(); 311 | tree.update(&Edit::MapInsert { 312 | parent: value::ObjectRef(MyId(0)), 313 | key: "my key".to_string(), 314 | item: Value::Object(value::ObjectRef(MyId(1))), 315 | }) 316 | .unwrap(); 317 | // attempt second assignment 318 | assert_eq!( 319 | Err(TreeError::NodeAlreadyHadParent), 320 | tree.update(&Edit::MapInsert { 321 | parent: value::ObjectRef(MyId(0)), 322 | key: "my key 2".to_string(), 323 | item: Value::Object(value::ObjectRef(MyId(1))) 324 | }) 325 | ); 326 | } 327 | 328 | #[test] 329 | fn cant_move_things_with_array_parents() { 330 | let mut tree = Tree::new_with_array_root(MyId(0)); 331 | tree.update(&Edit::MapCreate { 332 | id: value::ObjectRef(MyId(1)), 333 | }) 334 | .unwrap(); 335 | tree.insert_list_item(MyId(0), MyId(2), Value::Object(value::ObjectRef(MyId(1)))) 336 | .unwrap(); 337 | // attempt second insert 338 | assert_eq!( 339 | Err(TreeError::NodeAlreadyHadParent), 340 | tree.insert_list_item(MyId(0), MyId(3), Value::Object(value::ObjectRef(MyId(1)))) 341 | ); 342 | } 343 | 344 | #[test] 345 | fn object_assignment_prevents_cycles() { 346 | let mut tree = Tree::new_with_object_root(MyId(0)); 347 | 348 | // {} 349 | // ^ 350 | // 0 351 | assert_eq!(Ok(NodeType::Object), tree.get_type(MyId(0))); 352 | assert_eq!( 353 | Ok(value::Parent::None), 354 | value::ObjectRef(MyId(0)).parent(&tree) 355 | ); 356 | 357 | tree.update(&Edit::MapCreate { 358 | id: value::ObjectRef(MyId(1)), 359 | }) 360 | .unwrap(); 361 | tree.update(&Edit::MapInsert { 362 | parent: value::ObjectRef(MyId(0)), 363 | key: "my key".to_string(), 364 | item: Value::Object(value::ObjectRef(MyId(1))), 365 | }) 366 | .unwrap(); 367 | 368 | tree.update(&Edit::MapCreate { 369 | id: value::ObjectRef(MyId(2)), 370 | }) 371 | .unwrap(); 372 | tree.update(&Edit::MapInsert { 373 | parent: value::ObjectRef(MyId(1)), 374 | key: "my key 2".to_string(), 375 | item: Value::Object(value::ObjectRef(MyId(2))), 376 | }) 377 | .unwrap(); 378 | 379 | // {"my key": {"my key 2": {}}} 380 | // ^ ^ ^ 381 | // 0 1 2 382 | 383 | // let's attempt an assignment that causes a loop 384 | 385 | // first, unassign 1 from 0["my_key"] 386 | tree.update(&Edit::MapInsert { 387 | parent: value::ObjectRef(MyId(0)), 388 | key: "my key".to_string(), 389 | item: Value::Int(123), 390 | }) 391 | .unwrap(); // next, make the now-orphaned 1 a child of 2 assert_eq!(Err(TreeError::EditWouldCauseCycle), 392 | 393 | assert_eq!( 394 | Err(TreeError::EditWouldCauseCycle), 395 | tree.update(&Edit::MapInsert { 396 | parent: value::ObjectRef(MyId(2)), 397 | key: "my key 3".to_string(), 398 | item: Value::Object(value::ObjectRef(MyId(1))) 399 | }) 400 | ); 401 | } 402 | 403 | #[test] 404 | fn adjacent_string_index() { 405 | let mut tree = Tree::new_with_string_root(MyId(0)); 406 | for i in 0..5000 { 407 | tree.update(&Edit::TextInsert { 408 | index: value::StringIndex(MyId(i)), 409 | id: value::StringIndex(MyId(i + 10_000)), 410 | character: 'b', 411 | }) 412 | .unwrap(); 413 | tree.update(&Edit::TextDelete { 414 | id: value::StringIndex(MyId(i + 10_000)), 415 | }) 416 | .unwrap(); 417 | tree.update(&Edit::TextInsert { 418 | index: value::StringIndex(MyId(i + 10_000)), 419 | id: value::StringIndex(MyId(i + 1)), 420 | character: 'a', 421 | }) 422 | .unwrap(); 423 | } 424 | 425 | for i in 0..5001 { 426 | let next_id = value::StringIndex(MyId(i)).adjacent(&tree, 1).unwrap(); 427 | if i == 5000 { 428 | assert_eq!(next_id, value::StringIndex(MyId(5000))); 429 | } else { 430 | assert_eq!(next_id, value::StringIndex(MyId(i + 1))); 431 | } 432 | } 433 | 434 | for i in 0..5001 { 435 | let prev_id = value::StringIndex(MyId(i)).adjacent(&tree, -1).unwrap(); 436 | if i == 0 { 437 | assert_eq!(prev_id, value::StringIndex(MyId(0))); 438 | } else { 439 | assert_eq!(prev_id, value::StringIndex(MyId(i - 1))); 440 | } 441 | } 442 | 443 | for i in 0..5001 { 444 | let next_id = value::StringIndex(MyId(i)).adjacent(&tree, 2).unwrap(); 445 | if i == 5000 || i == 4999 { 446 | assert_eq!(next_id, value::StringIndex(MyId(5000))); 447 | } else { 448 | assert_eq!(next_id, value::StringIndex(MyId(i + 2))); 449 | } 450 | } 451 | 452 | for i in 0..5001 { 453 | let prev_id = value::StringIndex(MyId(i)).adjacent(&tree, -2).unwrap(); 454 | if i == 0 || i == 1 { 455 | assert_eq!(prev_id, value::StringIndex(MyId(0))); 456 | } else { 457 | assert_eq!(prev_id, value::StringIndex(MyId(i - 2))); 458 | } 459 | } 460 | } 461 | -------------------------------------------------------------------------------- /src/json/tree.rs: -------------------------------------------------------------------------------- 1 | use super::sequence; 2 | use super::value::{self, Value}; 3 | use im::{HashMap, HashSet}; 4 | use std::fmt::Debug; 5 | use std::hash::Hash; 6 | 7 | #[derive(Clone, Debug, PartialEq, Eq)] 8 | pub(super) enum Child { 9 | True, 10 | False, 11 | Int(i64), 12 | Null, 13 | Collection(NodeId), 14 | } 15 | 16 | #[derive(Clone, Debug, PartialEq, Eq)] 17 | pub enum Edit { 18 | ArrayCreate { 19 | /// id of new list 20 | id: value::ArrayRef, 21 | }, 22 | ArrayInsert { 23 | /// Position to insert at. 24 | index: value::ArrayIndex, 25 | /// Insertion id. This is used for deleting list items, and in other `ArrayInsert`'s `prev`. 26 | id: value::ArrayIndex, 27 | /// Item to be inserted. If this item had a prevous parent, it is removed from that parent. 28 | item: Value, 29 | }, 30 | ArrayDelete { 31 | /// Id of index to delete 32 | id: value::ArrayIndex, 33 | }, 34 | MapCreate { 35 | /// id of new map 36 | id: value::ObjectRef, 37 | }, 38 | MapInsert { 39 | /// Id of parent map 40 | parent: value::ObjectRef, 41 | /// Key of item in hashmap 42 | key: String, 43 | /// Item to be set. If this item had a prevous parent, it is removed from that parent. 44 | item: Value, 45 | }, 46 | TextCreate { 47 | /// id of new text 48 | id: value::StringRef, 49 | }, 50 | TextInsert { 51 | /// Position to insert at. 52 | index: value::StringIndex, 53 | /// Id of newly created character 54 | id: value::StringIndex, 55 | /// Actual new character value 56 | character: char, 57 | }, 58 | TextDelete { 59 | /// Id of character to delete 60 | id: value::StringIndex, 61 | }, 62 | } 63 | 64 | #[derive(Clone, Debug, PartialEq, Eq)] 65 | pub enum NodeType { 66 | String, 67 | Character, 68 | Object, 69 | Array, 70 | ArrayEntry, 71 | } 72 | 73 | #[derive(Clone, Debug, PartialEq, Eq)] 74 | pub enum TreeError { 75 | UnknownId, 76 | UnexpectedNodeType, 77 | DuplicateId, 78 | NodeAlreadyHadParent, 79 | EditWouldCauseCycle, 80 | } 81 | 82 | #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] 83 | pub(super) struct NodeId(usize); 84 | 85 | /// This struct is left public for others who would like to build their own CRDT library or have a 86 | /// custom setup of some kind. Most crudite users will not need to use this. 87 | /// 88 | /// A JSON-compatible document where each character and value in the document has a unique ID, and 89 | /// deletions in arrays and strings maintain tombstones for ordering future insertions. All methods 90 | /// on this tree should be `O(log n)` or better unless otherwise noted. The tree also internally 91 | /// uses persistent data structures, so cloning should be a very fast and efficient operation. 92 | /// 93 | /// Sequences like arrays and strings in `Tree` are represented by a persistent double linked list 94 | /// of segments. This is sorta like just the leaves of a rope connected by a doubly linked list. 95 | /// Why not use a rope? Ropes are useful for calculating "what character is at position n" very 96 | /// efficiently. However, it's tricky to make ropes work with random access via IDs, and there is 97 | /// overhead for calculating the rope. We opt instead to make indexed access `O(n)` and ID-based 98 | /// access `O(1)`. 99 | #[derive(Clone, Debug)] 100 | pub struct Tree { 101 | /// Number to use for the next node that is created. 102 | next_node: NodeId, 103 | 104 | /// Id of the root object of the tree 105 | root: Id, 106 | 107 | orphans: HashSet, 108 | 109 | /// Maps external IDs to their position in the tree. In the case of Segments of a sequence, 110 | /// futher disambiguation may be necessary to find the exact character this represents within 111 | /// the string. 112 | pub(super) id_to_node: HashMap, 113 | 114 | /// Maps node ids to node data. 115 | pub(super) nodes: HashMap>, 116 | } 117 | 118 | #[derive(Clone, Debug)] 119 | pub(super) struct Node { 120 | pub(super) data: NodeData, 121 | pub(super) parent: Option, 122 | } 123 | 124 | #[derive(Clone, Debug)] 125 | pub(super) enum NodeData { 126 | Object { 127 | items: HashMap, 128 | id: Id, 129 | }, 130 | /// Represents a JSON array value. 131 | Array { 132 | /// The first `ArraySegment` in the string value. May be equal to `end` if there is only one 133 | /// segment. 134 | start: NodeId, 135 | /// The last `ArraySegment` in the string value. May be equal to `start` if there is only 136 | /// one segment. 137 | end: NodeId, 138 | id: Id, 139 | }, 140 | /// Represents a range of a JSON array value. 141 | ArraySegment { 142 | /// Node index of the previous `ArraySegment` in this string. If this is the first segment 143 | /// in the string, refers to the `Text` parent. 144 | prev: NodeId, 145 | /// Node index of the next `ArraySegment` in this string. If this is the last segment 146 | /// in the string, refers to the `Text` parent. 147 | next: NodeId, 148 | /// Children in this segment. 149 | contents: Vec, 150 | /// List of ids. If they are a tombstone, the the Option will be None, if they represent a 151 | /// live character, the Option will show the index of the character. 152 | ids: Vec<(Id, Option)>, 153 | }, 154 | /// Represents a JSON string value. 155 | String { 156 | /// The first `StringSegment` in the string value. May be equal to `end` if there is only one 157 | /// segment. 158 | start: NodeId, 159 | /// The last `StringSegment` in the string value. May be equal to `start` if there is only 160 | /// one segment. 161 | end: NodeId, 162 | id: Id, 163 | }, 164 | /// Represents a range of a JSON string value. 165 | StringSegment { 166 | /// Node index of the previous `StringSegment` in this string. If this is the first segment 167 | /// in the string, refers to the `Text` parent. 168 | prev: NodeId, 169 | /// Node index of the next `StringSegment` in this string. If this is the last segment 170 | /// in the string, refers to the `Text` parent. 171 | next: NodeId, 172 | /// String contents of this segment. 173 | contents: String, 174 | /// List of ids. If they are a tombstone, the the Option will be None, if they represent a 175 | /// live character, the Option will show the index of the character. 176 | ids: Vec<(Id, Option)>, 177 | }, 178 | } 179 | 180 | impl Node { 181 | fn id(&self) -> Option { 182 | match &self.data { 183 | NodeData::Object { id, .. } => Some(id.clone()), 184 | NodeData::String { id, .. } => Some(id.clone()), 185 | NodeData::StringSegment { .. } => None, 186 | NodeData::Array { id, .. } => Some(id.clone()), 187 | NodeData::ArraySegment { .. } => None, 188 | } 189 | } 190 | 191 | /// Creates a new, empty NodeData for a segment with the same kind. `prev` and `next` are 192 | /// expected to be overwritten by the calling function. 193 | pub(super) fn segment_create(&self) -> NodeData { 194 | match &self.data { 195 | NodeData::StringSegment { prev, next, .. } => NodeData::StringSegment { 196 | prev: *prev, 197 | next: *next, 198 | contents: String::new(), 199 | ids: Vec::new(), 200 | }, 201 | NodeData::String { end, start, .. } => NodeData::StringSegment { 202 | prev: *end, 203 | next: *start, 204 | contents: String::new(), 205 | ids: Vec::new(), 206 | }, 207 | NodeData::ArraySegment { prev, next, .. } => NodeData::ArraySegment { 208 | prev: *prev, 209 | next: *next, 210 | contents: Vec::new(), 211 | ids: Vec::new(), 212 | }, 213 | NodeData::Array { end, start, .. } => NodeData::ArraySegment { 214 | prev: *end, 215 | next: *start, 216 | contents: Vec::new(), 217 | ids: Vec::new(), 218 | }, 219 | 220 | _ => panic!("segment_create called on non-sequence node"), 221 | } 222 | } 223 | 224 | /// Returns (prev, next) for segments, and (end, start) for sequence containers 225 | pub(super) fn segment_adjacencies(&self) -> (&NodeId, &NodeId) { 226 | match &self.data { 227 | NodeData::String { end, start, .. } => (end, start), 228 | NodeData::StringSegment { prev, next, .. } => (prev, next), 229 | NodeData::Array { end, start, .. } => (end, start), 230 | NodeData::ArraySegment { prev, next, .. } => (prev, next), 231 | _ => panic!("segment_adjacencies called on non-sequence typed node"), 232 | } 233 | } 234 | 235 | /// Returns (prev, next) for segments, and (end, start) for sequence containers 236 | pub(super) fn segment_adjacencies_mut(&mut self) -> (&mut NodeId, &mut NodeId) { 237 | match &mut self.data { 238 | NodeData::String { end, start, .. } => (end, start), 239 | NodeData::StringSegment { prev, next, .. } => (prev, next), 240 | NodeData::Array { end, start, .. } => (end, start), 241 | NodeData::ArraySegment { prev, next, .. } => (prev, next), 242 | _ => panic!("segment_adjacencies called on non-sequence typed node"), 243 | } 244 | } 245 | 246 | pub(super) fn segment_ids(&self) -> Result<&Vec<(Id, Option)>, TreeError> { 247 | match &self.data { 248 | NodeData::StringSegment { ids, .. } => Ok(ids), 249 | NodeData::ArraySegment { ids, .. } => Ok(ids), 250 | _ => Err(TreeError::UnexpectedNodeType), 251 | } 252 | } 253 | 254 | pub(super) fn segment_ids_mut(&mut self) -> Result<&mut Vec<(Id, Option)>, TreeError> { 255 | match &mut self.data { 256 | NodeData::StringSegment { ids, .. } => Ok(ids), 257 | NodeData::ArraySegment { ids, .. } => Ok(ids), 258 | _ => Err(TreeError::UnexpectedNodeType), 259 | } 260 | } 261 | 262 | pub(super) fn segment_contents_len(&self) -> Result { 263 | match &self.data { 264 | NodeData::StringSegment { contents, .. } => Ok(contents.len()), 265 | NodeData::ArraySegment { contents, .. } => Ok(contents.len()), 266 | _ => Err(TreeError::UnexpectedNodeType), 267 | } 268 | } 269 | 270 | pub(super) fn segment_is_container(&self) -> bool { 271 | match &self.data { 272 | NodeData::String { .. } => true, 273 | NodeData::Array { .. } => true, 274 | _ => false, 275 | } 276 | } 277 | 278 | pub(super) fn segment_split_contents_into(&mut self, other: &mut Node, split_index: usize) { 279 | match (&mut self.data, &mut other.data) { 280 | ( 281 | NodeData::StringSegment { 282 | contents: self_contents, 283 | .. 284 | }, 285 | NodeData::StringSegment { 286 | contents: other_contents, 287 | .. 288 | }, 289 | ) => { 290 | if other_contents.len() != 0 { 291 | panic!("split_contents_into's `other` did not have empty contents"); 292 | } 293 | let new_string = self_contents.split_off(split_index); 294 | *other_contents = new_string; 295 | } 296 | ( 297 | NodeData::ArraySegment { 298 | contents: self_contents, 299 | .. 300 | }, 301 | NodeData::ArraySegment { 302 | contents: other_contents, 303 | .. 304 | }, 305 | ) => { 306 | if other_contents.len() != 0 { 307 | panic!("split_contents_into's `other` did not have empty contents"); 308 | } 309 | let new_vec = self_contents.split_off(split_index); 310 | *other_contents = new_vec; 311 | } 312 | 313 | _ => panic!("two node types in split_contents_into did not match or were not segments"), 314 | } 315 | } 316 | } 317 | 318 | impl Tree { 319 | /// This is private since it constructs a tree with no root value; use one of the public 320 | /// constructors to create the `Tree` instead. 321 | fn new(root_id: Id) -> Self { 322 | Tree { 323 | orphans: HashSet::new(), 324 | next_node: NodeId(0), 325 | id_to_node: HashMap::new(), 326 | nodes: HashMap::new(), 327 | root: root_id, 328 | } 329 | } 330 | 331 | pub fn update(&mut self, edit: &Edit) -> Result<(), TreeError> { 332 | match edit { 333 | Edit::ArrayCreate { id } => self.construct_array(id.0.clone()), 334 | Edit::ArrayInsert { index, id, item } => { 335 | self.insert_list_item(index.0.clone(), id.0.clone(), item.clone()) 336 | } 337 | Edit::ArrayDelete { id } => self.delete_list_item(id.0.clone()).map(|_| ()), 338 | Edit::MapCreate { id } => self.construct_object(id.0.clone()), 339 | Edit::MapInsert { parent, key, item } => self 340 | .object_assign(parent.0.clone(), key.clone(), item.clone()) 341 | .map(|_| ()), 342 | Edit::TextCreate { id } => self.construct_string(id.0.clone()), 343 | Edit::TextInsert { 344 | index, 345 | id, 346 | character, 347 | } => self.insert_character(index.0.clone(), id.0.clone(), *character), 348 | Edit::TextDelete { id } => self.delete_character(id.0.clone()), 349 | } 350 | } 351 | 352 | /// Creates a new `Tree` representing an empty string. 353 | pub fn new_with_string_root(root_id: Id) -> Self { 354 | let mut tree = Self::new(root_id.clone()); 355 | tree.construct_string(root_id).unwrap(); 356 | tree.orphans = HashSet::new(); 357 | tree 358 | } 359 | 360 | /// Creates a new `Tree` representing an empty object. 361 | pub fn new_with_object_root(root_id: Id) -> Self { 362 | let mut tree = Self::new(root_id.clone()); 363 | tree.construct_object(root_id).unwrap(); 364 | tree.orphans = HashSet::new(); 365 | tree 366 | } 367 | 368 | /// Creates a new `Tree` representing an empty array. 369 | pub fn new_with_array_root(root_id: Id) -> Self { 370 | let mut tree = Self::new(root_id.clone()); 371 | tree.construct_array(root_id).unwrap(); 372 | tree.orphans = HashSet::new(); 373 | tree 374 | } 375 | 376 | fn construct_simple(&mut self, id: Id, node_data: NodeData) -> Result { 377 | if self.id_to_node.contains_key(&id) { 378 | return Err(TreeError::DuplicateId); 379 | } 380 | let node_id = self.next_id(); 381 | self.id_to_node.insert(id, node_id); 382 | self.orphans.insert(node_id); 383 | self.nodes.insert( 384 | node_id, 385 | Node { 386 | parent: None, 387 | data: node_data, 388 | }, 389 | ); 390 | Ok(node_id) 391 | } 392 | 393 | /// Constructs a new empty object within the `Tree`. Newly constructed values have no parent or 394 | /// place in the tree until placed with an `assign` call. 395 | pub(super) fn construct_object(&mut self, id: Id) -> Result<(), TreeError> { 396 | self.construct_simple( 397 | id.clone(), 398 | NodeData::Object { 399 | items: HashMap::new(), 400 | id, 401 | }, 402 | ) 403 | .map(|_| ()) 404 | } 405 | 406 | /// Constructs a new empty string within the `Tree`. Newly constructed values have no parent or 407 | /// place in the tree until placed with an `assign` call. 408 | pub(super) fn construct_string(&mut self, id: Id) -> Result<(), TreeError> { 409 | let segment_id = self.next_id(); 410 | let string_id = self.construct_simple( 411 | id.clone(), 412 | NodeData::String { 413 | id, 414 | start: segment_id, 415 | end: segment_id, 416 | }, 417 | )?; 418 | self.nodes.insert( 419 | segment_id, 420 | Node { 421 | parent: Some(string_id), 422 | data: NodeData::StringSegment { 423 | contents: "".to_string(), 424 | ids: vec![], 425 | prev: string_id, 426 | next: string_id, 427 | }, 428 | }, 429 | ); 430 | Ok(()) 431 | } 432 | 433 | /// Constructs a new empty string within the `Tree`. Newly constructed values have no parent or 434 | /// place in the tree until placed with an `assign` call. 435 | pub(super) fn construct_array(&mut self, id: Id) -> Result<(), TreeError> { 436 | let segment_id = self.next_id(); 437 | let array_id = self.construct_simple( 438 | id.clone(), 439 | NodeData::Array { 440 | id, 441 | start: segment_id, 442 | end: segment_id, 443 | }, 444 | )?; 445 | self.nodes.insert( 446 | segment_id, 447 | Node { 448 | parent: Some(array_id), 449 | data: NodeData::ArraySegment { 450 | contents: vec![], 451 | ids: vec![], 452 | prev: array_id, 453 | next: array_id, 454 | }, 455 | }, 456 | ); 457 | Ok(()) 458 | } 459 | 460 | pub(super) fn next_id(&mut self) -> NodeId { 461 | let res = self.next_node; 462 | self.next_node.0 += 1; 463 | res 464 | } 465 | 466 | pub fn delete_orphans(&mut self) { 467 | for orphan in self.orphans.clone() { 468 | self.delete(orphan); 469 | } 470 | self.orphans = HashSet::new(); 471 | } 472 | 473 | /// Deletes a node and all its children. If you want to delete a single segment, try 474 | /// `delete_segment`. This operation is slow since it recurses on all sub-nodes; you may want 475 | /// to consider just moving a node into the tree's `orphan` list. 476 | fn delete(&mut self, item: NodeId) { 477 | let mut queue = vec![item]; 478 | while let Some(item) = queue.pop() { 479 | let node = match self.nodes.remove(&item) { 480 | Some(v) => v, 481 | None => continue, 482 | }; 483 | match node.data { 484 | NodeData::Object { id, items } => { 485 | for (_, val) in items { 486 | match val { 487 | Child::Collection(id) => { 488 | queue.push(id); 489 | } 490 | // do nothing for other values; don't have any subchildren to delete 491 | Child::True | Child::False | Child::Null | Child::Int(_) => {} 492 | } 493 | } 494 | self.id_to_node.remove(&id).unwrap(); 495 | } 496 | NodeData::String { start, id, .. } => { 497 | queue.push(start); 498 | self.id_to_node.remove(&id).unwrap(); 499 | } 500 | NodeData::StringSegment { next, ids, .. } => { 501 | queue.push(next); 502 | for (id, _) in ids { 503 | self.id_to_node.remove(&id).unwrap(); 504 | } 505 | } 506 | NodeData::Array { start, id, .. } => { 507 | queue.push(start); 508 | self.id_to_node.remove(&id).unwrap(); 509 | } 510 | NodeData::ArraySegment { 511 | next, 512 | ids, 513 | contents, 514 | .. 515 | } => { 516 | queue.push(next); 517 | for (id, _) in ids { 518 | self.id_to_node.remove(&id).unwrap(); 519 | } 520 | for item in contents { 521 | match item { 522 | Child::Collection(id) => { 523 | queue.push(id); 524 | } 525 | // do nothing for other values; don't have any subchildren to delete 526 | Child::True | Child::False | Child::Null | Child::Int(_) => {} 527 | } 528 | } 529 | } 530 | } 531 | } 532 | } 533 | 534 | fn move_to_orphan(&mut self, item: NodeId) { 535 | self.nodes[&item].parent = None; 536 | self.orphans.insert(item); 537 | } 538 | 539 | // has to recurse up parents to ensure we haven't made any cycles, unfortunately 540 | fn reparent_item(&mut self, item: NodeId, parent: NodeId) -> Result<(), TreeError> { 541 | if self.nodes[&item].parent.is_some() { 542 | return Err(TreeError::NodeAlreadyHadParent); 543 | } 544 | let mut next = Some(parent); 545 | while let Some(this) = next.take() { 546 | if this == item { 547 | return Err(TreeError::EditWouldCauseCycle); 548 | } 549 | next = self.nodes[&this].parent; 550 | } 551 | 552 | self.orphans.remove(&item).unwrap(); 553 | self.nodes[&item].parent = Some(parent); 554 | Ok(()) 555 | } 556 | 557 | pub(super) fn value_to_child(&self, value: &Value) -> Result, TreeError> { 558 | match value { 559 | Value::Object(value::ObjectRef(id)) 560 | | Value::Array(value::ArrayRef(id)) 561 | | Value::String(value::StringRef(id)) => { 562 | // TODO should we validate types here? 563 | let node_id = self.id_to_node(&id)?; 564 | Ok(Some(Child::Collection(node_id))) 565 | } 566 | Value::True => Ok(Some(Child::True)), 567 | Value::False => Ok(Some(Child::False)), 568 | Value::Null => Ok(Some(Child::Null)), 569 | Value::Int(i) => Ok(Some(Child::Int(*i))), 570 | Value::Unset => Ok(None), 571 | } 572 | } 573 | 574 | pub(super) fn child_to_value(&self, child: Option<&Child>) -> Value { 575 | match child { 576 | None => Value::Unset, 577 | Some(Child::True) => Value::True, 578 | Some(Child::False) => Value::False, 579 | Some(Child::Null) => Value::Null, 580 | Some(Child::Int(i)) => Value::Int(*i), 581 | Some(Child::Collection(node_id)) => { 582 | let id = self.nodes[&node_id] 583 | .id() 584 | .expect("segment was somehow child of object?"); 585 | match self.get_type(id.clone()) { 586 | Ok(NodeType::String) => Value::String(value::StringRef(id)), 587 | Ok(NodeType::Object) => Value::Object(value::ObjectRef(id)), 588 | Ok(NodeType::Array) => Value::Array(value::ArrayRef(id)), 589 | _ => panic!("collection id did not have type of collection"), 590 | } 591 | } 592 | } 593 | } 594 | 595 | pub(super) fn id_to_node(&self, id: &Id) -> Result { 596 | self.id_to_node 597 | .get(id) 598 | .ok_or(TreeError::UnknownId) 599 | .map(|v| *v) 600 | } 601 | 602 | // TODO right now this is last-write-wins, could modify the object NodeData pretty lightly and 603 | // get multi value registers which would be sick 604 | /// Moves `value` to `object[key]`. If `value` is `None`, the key is deleted. If there was a 605 | /// previous collection assigned to this key, it is reparented into the tree's `orphan` list. 606 | pub(super) fn object_assign( 607 | &mut self, 608 | object: Id, 609 | key: String, 610 | value: Value, 611 | ) -> Result, TreeError> { 612 | let child_opt = self.value_to_child(&value)?; 613 | let object_node_id = self.id_to_node(&object)?; 614 | if let Some(Child::Collection(child)) = &child_opt { 615 | self.reparent_item(*child, object_node_id)?; 616 | } 617 | match &mut self.nodes[&object_node_id].data { 618 | NodeData::Object { items, id: _ } => { 619 | let old = if let Some(child) = child_opt { 620 | items.insert(key, child) 621 | } else { 622 | items.remove(&key) 623 | }; 624 | if let Some(Child::Collection(old_id)) = old { 625 | self.move_to_orphan(old_id); 626 | } 627 | Ok(self.child_to_value(old.as_ref())) 628 | } 629 | _ => Err(TreeError::UnexpectedNodeType), 630 | } 631 | } 632 | 633 | /// Gets the type of `Id`. 634 | pub(super) fn get_type(&self, id: Id) -> Result { 635 | let node_id = self.id_to_node(&id)?; 636 | let node = self 637 | .nodes 638 | .get(&node_id) 639 | .expect("node_id listed in id_to_node did not exist."); 640 | match node.data { 641 | NodeData::Object { .. } => Ok(NodeType::Object), 642 | NodeData::String { .. } => Ok(NodeType::String), 643 | NodeData::StringSegment { .. } => Ok(NodeType::Character), 644 | NodeData::Array { .. } => Ok(NodeType::Array), 645 | NodeData::ArraySegment { .. } => Ok(NodeType::ArrayEntry), 646 | } 647 | } 648 | 649 | pub(super) fn get_parent(&self, id: Id) -> Result, TreeError> { 650 | let node_id = self.id_to_node(&id)?; 651 | let node = self 652 | .nodes 653 | .get(&node_id) 654 | .expect("node_id listed in id_to_node did not exist."); 655 | let parent_id = match node.parent { 656 | None => return Ok(None), 657 | Some(v) => v, 658 | }; 659 | let parent = self 660 | .nodes 661 | .get(&parent_id) 662 | .expect("node_id listed in id_to_node did not exist."); 663 | Ok(Some( 664 | parent 665 | .id() 666 | .expect("parent of node was a string segment somehow"), 667 | )) 668 | } 669 | 670 | /// Creates `character` in the tree with id `character_id`, and immediately inserts it after 671 | /// the character `append_id`. If `append_id` is the ID of a string instead of a character, 672 | /// `character` will be inserted at the beginning of the string. `append_id` may be a deleted 673 | /// character, if the tombstone is still in the tree. 674 | pub(super) fn insert_character( 675 | &mut self, 676 | append_id: Id, 677 | character_id: Id, 678 | character: char, 679 | ) -> Result<(), TreeError> { 680 | sequence::insert(self, append_id, character_id, |string_index, node| { 681 | match &mut node.data { 682 | NodeData::StringSegment { contents, .. } => { 683 | contents.insert(string_index, character); 684 | } 685 | _ => panic!("unknown object type!!"), 686 | } 687 | character.len_utf8() 688 | }) 689 | } 690 | 691 | /// Deletes the character with ID `char_id`. A tombstone is left in the string, allowing future 692 | /// `insert_character` calls to reference this `char_id` as their `append_id`. 693 | pub(super) fn delete_character(&mut self, char_id: Id) -> Result<(), TreeError> { 694 | sequence::delete(self, char_id, |string_index, node| match &mut node.data { 695 | NodeData::StringSegment { contents, .. } => { 696 | let deleted_char = contents.remove(string_index); 697 | deleted_char.len_utf8() 698 | } 699 | _ => panic!("unknown object type!!"), 700 | }) 701 | } 702 | 703 | /// Creates `character` in the tree with id `character_id`, and immediately inserts it after 704 | /// the character `append_id`. If `append_id` is the ID of a string instead of a character, 705 | /// `character` will be inserted at the beginning of the string. `append_id` may be a deleted 706 | /// character, if the tombstone is still in the tree. 707 | pub(super) fn insert_list_item( 708 | &mut self, 709 | append_id: Id, 710 | character_id: Id, 711 | value: Value, 712 | ) -> Result<(), TreeError> { 713 | // TODO need to better check for invalid input 714 | let child = match self.value_to_child(&value)? { 715 | Some(v) => v, 716 | None => return Ok(()), 717 | }; 718 | if let Child::Collection(child) = &child { 719 | let append_node = self.id_to_node(&append_id)?; 720 | match self.nodes[&append_node] { 721 | Node { 722 | data: NodeData::ArraySegment { .. }, 723 | parent, 724 | } => { 725 | self.reparent_item(*child, parent.unwrap())?; 726 | } 727 | Node { 728 | data: NodeData::Array { .. }, 729 | .. 730 | } => { 731 | self.reparent_item(*child, append_node)?; 732 | } 733 | _ => return Err(TreeError::UnexpectedNodeType), 734 | } 735 | } 736 | sequence::insert(self, append_id, character_id, |array_index, node| { 737 | match &mut node.data { 738 | NodeData::ArraySegment { contents, .. } => { 739 | contents.insert(array_index, child); 740 | } 741 | _ => panic!("unknown object type!!"), 742 | } 743 | 1 744 | }) 745 | } 746 | 747 | /// Deletes the item in the list with ID `item_id`. A tombstone is left in the string, allowing 748 | /// future `insert_character` calls to reference this `char_id` as their `append_id`. 749 | pub(super) fn delete_list_item(&mut self, item_id: Id) -> Result, TreeError> { 750 | let mut child_opt = None; 751 | sequence::delete(self, item_id, |array_index, node| match &mut node.data { 752 | NodeData::ArraySegment { contents, .. } => { 753 | child_opt = Some(contents.remove(array_index)); 754 | 1 755 | } 756 | _ => panic!("unknown object type!!"), 757 | })?; 758 | if let Some(Child::Collection(id)) = &child_opt { 759 | self.move_to_orphan(*id); 760 | } 761 | Ok(self.child_to_value(child_opt.as_ref())) 762 | } 763 | } 764 | -------------------------------------------------------------------------------- /src/json/value.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::hash::Hash; 3 | 4 | use super::tree; 5 | 6 | #[derive(Clone, Debug, PartialEq, Eq)] 7 | pub enum Value { 8 | String(StringRef), 9 | Array(ArrayRef), 10 | Object(ObjectRef), 11 | Int(i64), 12 | True, 13 | False, 14 | Null, 15 | Unset, 16 | } 17 | 18 | #[derive(Clone, Debug, PartialEq, Eq)] 19 | pub enum Parent { 20 | Array(ArrayRef), 21 | Object(ObjectRef), 22 | None, 23 | } 24 | 25 | pub(super) fn get_parent( 26 | tree: &tree::Tree, 27 | id: &Id, 28 | ) -> Result, tree::TreeError> { 29 | let id = match tree.get_parent(id.clone())? { 30 | Some(v) => v, 31 | None => return Ok(Parent::None), 32 | }; 33 | match tree.get_type(id.clone()) { 34 | Ok(tree::NodeType::Array) => Ok(Parent::Array(ArrayRef(id))), 35 | Ok(tree::NodeType::Object) => Ok(Parent::Object(ObjectRef(id))), 36 | e => panic!("parent was of unexpected type: {:?}", e), 37 | } 38 | } 39 | 40 | #[derive(Clone, Debug, PartialEq, Eq)] 41 | pub struct StringRef(pub Id); 42 | impl StringRef { 43 | pub fn to_string(&self, tree: &tree::Tree) -> Result { 44 | let string_node_id = tree.id_to_node(&self.0)?; 45 | let node = tree 46 | .nodes 47 | .get(&string_node_id) 48 | .expect("node_id listed in id_to_node did not exist."); 49 | let mut next = match &node.data { 50 | tree::NodeData::String { start, .. } => *start, 51 | _ => return Err(tree::TreeError::UnexpectedNodeType), 52 | }; 53 | let mut string = String::new(); 54 | while next != string_node_id { 55 | let node = tree 56 | .nodes 57 | .get(&next) 58 | .expect("node_id listed in segment adjacency did not exist."); 59 | next = match &node.data { 60 | tree::NodeData::StringSegment { next, contents, .. } => { 61 | string.push_str(contents); 62 | *next 63 | } 64 | _ => panic!("debug_get_string called on non-string Id"), 65 | }; 66 | } 67 | Ok(string) 68 | } 69 | 70 | pub fn start(&self, tree: &tree::Tree) -> Result, tree::TreeError> { 71 | // validate still in tree 72 | let _ = tree.id_to_node(&self.0)?; 73 | Ok(StringIndex(self.0.clone())) 74 | } 75 | 76 | pub fn end(&self, tree: &tree::Tree) -> Result, tree::TreeError> { 77 | let node_id = tree.id_to_node(&self.0)?; 78 | let node = tree 79 | .nodes 80 | .get(&node_id) 81 | .expect("node_id listed in id_to_node did not exist."); 82 | let last_node_id = match &node.data { 83 | tree::NodeData::String { end, .. } => *end, 84 | _ => return Err(tree::TreeError::UnexpectedNodeType), 85 | }; 86 | let last_node = tree.nodes.get(&last_node_id).unwrap(); 87 | match &last_node.data { 88 | tree::NodeData::StringSegment { ids, .. } => { 89 | Ok(StringIndex(ids.last().unwrap().0.clone())) 90 | } 91 | _ => Err(tree::TreeError::UnexpectedNodeType), 92 | } 93 | } 94 | 95 | pub fn parent(&self, tree: &tree::Tree) -> Result, tree::TreeError> { 96 | get_parent(&tree, &self.0) 97 | } 98 | } 99 | 100 | #[derive(Clone, Debug, PartialEq, Eq)] 101 | pub struct StringIndex(pub Id); 102 | impl StringIndex { 103 | pub fn parent(&self, tree: &tree::Tree) -> Result, tree::TreeError> { 104 | match tree.get_type(self.0.clone()) { 105 | Ok(tree::NodeType::String) => Ok(StringRef(self.0.clone())), 106 | Ok(tree::NodeType::Character) => Ok(StringRef( 107 | tree.get_parent(self.0.clone())? 108 | .expect("Stringsegment should have parent"), 109 | )), 110 | Ok(_) => Err(tree::TreeError::UnexpectedNodeType), 111 | Err(e) => Err(e), 112 | } 113 | } 114 | 115 | fn adjacent_next( 116 | &self, 117 | tree: &tree::Tree, 118 | backwards: bool, 119 | ) -> Result, tree::TreeError> { 120 | let node_id = tree.id_to_node(&self.0)?; 121 | let mut this_node = tree 122 | .nodes 123 | .get(&node_id) 124 | .expect("node_id listed in id_to_node did not exist."); 125 | let mut this_index = match &this_node.data { 126 | tree::NodeData::StringSegment { ids, .. } => { 127 | let pos = ids.iter().position(|(id, _)| id == &self.0).unwrap(); 128 | pos 129 | } 130 | tree::NodeData::String { start, .. } => 0, 131 | _ => return Err(tree::TreeError::UnexpectedNodeType), 132 | }; 133 | 134 | loop { 135 | let (next_node, next_index) = match &this_node.data { 136 | tree::NodeData::String { start, .. } => { 137 | if backwards { 138 | // started at start of string and going backwards; return self 139 | return Ok(self.clone()); 140 | } 141 | (tree.nodes.get(&start).unwrap(), None) 142 | } 143 | tree::NodeData::StringSegment { 144 | ids, next, prev, .. 145 | } => { 146 | if (backwards && this_index > 0) || (!backwards && this_index + 1 < ids.len()) { 147 | ( 148 | this_node, 149 | Some(if backwards { 150 | this_index - 1 151 | } else { 152 | this_index + 1 153 | }), 154 | ) 155 | } else { 156 | if backwards { 157 | (tree.nodes.get(&prev).unwrap(), None) 158 | } else { 159 | (tree.nodes.get(&next).unwrap(), None) 160 | } 161 | } 162 | } 163 | _ => return Err(tree::TreeError::UnexpectedNodeType), 164 | }; 165 | 166 | match (&this_node.data, &next_node.data) { 167 | (tree::NodeData::String { .. }, tree::NodeData::String { .. }) => { 168 | panic!("invalid string data structure") 169 | } 170 | (tree::NodeData::StringSegment { ids, .. }, tree::NodeData::String { id, .. }) => { 171 | // hit edge of string; return 172 | if backwards { 173 | return Ok(StringIndex(id.clone())); 174 | } else { 175 | return Ok(StringIndex(ids[this_index].0.clone())); 176 | } 177 | } 178 | (_, tree::NodeData::StringSegment { ids, .. }) => { 179 | this_node = next_node; 180 | this_index = next_index.unwrap_or(if backwards && ids.len() > 0 { 181 | ids.len() - 1 182 | } else { 183 | 0 184 | }); 185 | if ids.len() > 0 { 186 | if ids[this_index].1.is_some() { 187 | return Ok(StringIndex(ids[this_index].0.clone())); 188 | } 189 | } 190 | } 191 | _ => panic!("invalid node types"), 192 | } 193 | } 194 | } 195 | 196 | pub fn still_exists(&self, tree: &tree::Tree) -> bool { 197 | let node_id = match tree.id_to_node(&self.0) { 198 | Ok(v) => v, 199 | Err(_) => return false, 200 | }; 201 | let this_node = tree.nodes.get(&node_id).unwrap(); 202 | match &this_node.data { 203 | tree::NodeData::StringSegment { ids, .. } => { 204 | let pos = ids.iter().position(|(id, _)| id == &self.0).unwrap(); 205 | ids[pos].1.is_some() 206 | } 207 | tree::NodeData::String { start, .. } => true, 208 | _ => false, 209 | } 210 | } 211 | 212 | /// Returns the index that is `num` characters away from `self`. If reaches start or end of 213 | /// string, will stop. Takes `O(n)`; make take longer if there are a lot of deleted characters 214 | /// to traverse over. 215 | pub fn adjacent( 216 | &self, 217 | tree: &tree::Tree, 218 | mut num: i64, 219 | ) -> Result, tree::TreeError> { 220 | { 221 | if num > 0 && !self.still_exists(tree) { 222 | // character doesn't exist and we're moving forward; so add one to number so that 223 | // we resolve the character to its true position 224 | num += 1; 225 | } 226 | } 227 | 228 | let mut i = self.clone(); 229 | 230 | while num != 0 { 231 | if num > 0 { 232 | i = i.adjacent_next(tree, false)?; 233 | num -= 1; 234 | } else { 235 | i = i.adjacent_next(tree, true)?; 236 | num += 1; 237 | } 238 | } 239 | 240 | Ok(i) 241 | } 242 | } 243 | 244 | #[derive(Clone, Debug, PartialEq, Eq)] 245 | pub struct ArrayRef(pub Id); 246 | impl ArrayRef { 247 | pub fn to_vec(&self, tree: &tree::Tree) -> Result>, tree::TreeError> { 248 | let string_node_id = tree.id_to_node(&self.0)?; 249 | let node = tree 250 | .nodes 251 | .get(&string_node_id) 252 | .expect("node_id listed in id_to_node did not exist."); 253 | let mut next = match &node.data { 254 | tree::NodeData::Array { start, .. } => *start, 255 | _ => return Err(tree::TreeError::UnexpectedNodeType), 256 | }; 257 | let mut children = Vec::new(); 258 | while next != string_node_id { 259 | let node = tree 260 | .nodes 261 | .get(&next) 262 | .expect("node_id listed in segment adjacency did not exist."); 263 | next = match &node.data { 264 | tree::NodeData::ArraySegment { next, contents, .. } => { 265 | children.extend(contents.iter()); 266 | *next 267 | } 268 | _ => panic!("debug_get_string called on non-string Id"), 269 | }; 270 | } 271 | let values = children 272 | .iter() 273 | .map(|child| tree.child_to_value(Some(child))) 274 | .collect(); 275 | Ok(values) 276 | } 277 | 278 | pub fn parent(&self, tree: &tree::Tree) -> Result, tree::TreeError> { 279 | get_parent(&tree, &self.0) 280 | } 281 | } 282 | 283 | #[derive(Clone, Debug, PartialEq, Eq)] 284 | pub struct ArrayIndex(pub Id); 285 | impl ArrayIndex { 286 | pub fn parent(&self, tree: &tree::Tree) -> Result, tree::TreeError> { 287 | match tree.get_type(self.0.clone()) { 288 | Ok(tree::NodeType::Array) => Ok(ArrayRef(self.0.clone())), 289 | Ok(tree::NodeType::ArrayEntry) => Ok(ArrayRef( 290 | tree.get_parent(self.0.clone())? 291 | .expect("arraysegment should have parent"), 292 | )), 293 | Ok(_) => Err(tree::TreeError::UnexpectedNodeType), 294 | Err(e) => Err(e), 295 | } 296 | } 297 | } 298 | 299 | #[derive(Clone, Debug, PartialEq, Eq)] 300 | pub struct ObjectRef(pub Id); 301 | impl ObjectRef { 302 | pub fn parent(&self, tree: &tree::Tree) -> Result, tree::TreeError> { 303 | get_parent(&tree, &self.0) 304 | } 305 | 306 | pub fn get(&self, tree: &tree::Tree, key: &str) -> Result, tree::TreeError> { 307 | let object_node_id = tree.id_to_node(&self.0)?; 308 | let child = match &tree.nodes[&object_node_id].data { 309 | tree::NodeData::Object { items, id: _ } => items.get(key), 310 | _ => return Err(tree::TreeError::UnexpectedNodeType), 311 | }; 312 | Ok(tree.child_to_value(child)) 313 | } 314 | } 315 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod json; 2 | pub mod opset; 3 | 4 | mod doc; 5 | pub use doc::*; 6 | -------------------------------------------------------------------------------- /src/opset.rs: -------------------------------------------------------------------------------- 1 | pub trait Operation { 2 | fn apply(&self, tree: &mut State); 3 | } 4 | 5 | pub struct Opset + Ord, S: Clone> { 6 | /// list of all ops applied to this tree 7 | ops: Vec, 8 | /// a list of (num ops applied, state of tree at that point in time) 9 | states: Vec<(usize, S)>, 10 | /// at most, this many ops will be skipped over in the states cache 11 | cache_gap: usize, 12 | } 13 | 14 | impl + Ord, S: Clone> Opset { 15 | pub fn new(initial_state: S, cache_gap: usize) -> Self { 16 | Opset { 17 | ops: Vec::new(), 18 | cache_gap, 19 | states: vec![(0, initial_state)], 20 | } 21 | } 22 | 23 | pub fn update(&mut self, edit: E) { 24 | let insert_point = self 25 | .ops 26 | .binary_search(&edit) 27 | .expect_err("two ops had the same timestamp"); 28 | self.ops.insert(insert_point, edit); 29 | self.recalculate(insert_point); 30 | } 31 | 32 | pub fn update_from_iter>(&mut self, ops: I) { 33 | let mut least_insert_point = None; 34 | for edit in ops { 35 | let insert_point = self 36 | .ops 37 | .binary_search(&edit) 38 | .expect_err("two ops had the same timestamp"); 39 | self.ops.insert(insert_point, edit); 40 | least_insert_point = match least_insert_point { 41 | Some(prev) if prev < insert_point => Some(prev), 42 | _ => Some(insert_point), 43 | }; 44 | } 45 | if let Some(least_insert_point) = least_insert_point { 46 | self.recalculate(least_insert_point); 47 | } 48 | } 49 | 50 | /// Recalculates states after the edit list has been changed. The first `insert_point` 51 | /// ops should be identical to the last time `recalculate` was called. 52 | fn recalculate(&mut self, insert_point: usize) { 53 | let index_of_first_bad_state = 54 | match self.states.binary_search_by_key(&insert_point, |(n, _)| *n) { 55 | Ok(n) => n + 1, 56 | Err(n) => n, 57 | }; 58 | // delete all previous states after least_insert_point, add one so that if something exists 59 | // exactly at `least_insert_point` it is preserved. 60 | self.states.truncate(index_of_first_bad_state); 61 | let (mut applied_ops, mut state) = self.states.pop().unwrap(); 62 | while applied_ops < self.ops.len() { 63 | if self.states.len() == 0 64 | || self.states.last().unwrap().0 + self.cache_gap <= applied_ops 65 | { 66 | // time to insert a new cache 67 | self.states.push((applied_ops, state.clone())); 68 | } 69 | self.ops[applied_ops].apply(&mut state); 70 | applied_ops += 1; 71 | } 72 | self.states.push((applied_ops, state)); 73 | } 74 | 75 | pub fn state(&self) -> &S { 76 | &self 77 | .states 78 | .last() 79 | .expect("somehow state cache was empty?") 80 | .1 81 | } 82 | } 83 | 84 | #[cfg(test)] 85 | mod test { 86 | use super::*; 87 | 88 | #[derive(PartialOrd, Ord, Debug, Clone, Eq, PartialEq)] 89 | struct TestEdit { 90 | timestamp: usize, 91 | value: usize, 92 | } 93 | 94 | impl Operation> for TestEdit { 95 | fn apply(&self, state: &mut Vec) { 96 | state.push(self.value); 97 | } 98 | } 99 | 100 | #[test] 101 | fn various_ops_work() { 102 | let mut crdt = Opset::new(vec![0], 2); 103 | // initial edit 104 | crdt.update(TestEdit { 105 | timestamp: 10, 106 | value: 1, 107 | }); 108 | assert_eq!(crdt.state(), &[0, 1]); 109 | assert_eq!(crdt.states.len(), 2); 110 | 111 | // edit before start 112 | crdt.update(TestEdit { 113 | timestamp: 5, 114 | value: 2, 115 | }); 116 | assert_eq!(crdt.state(), &[0, 2, 1]); 117 | assert_eq!(crdt.states.len(), 2); 118 | 119 | // edit at end 120 | crdt.update(TestEdit { 121 | timestamp: 15, 122 | value: 3, 123 | }); 124 | assert_eq!(crdt.state(), &[0, 2, 1, 3]); 125 | assert_eq!(crdt.states.len(), 3); 126 | 127 | // edit in middle 128 | crdt.update(TestEdit { 129 | timestamp: 12, 130 | value: 4, 131 | }); 132 | assert_eq!(crdt.state(), &[0, 2, 1, 4, 3]); 133 | assert_eq!(crdt.states.len(), 3); 134 | 135 | // one more edit 136 | crdt.update(TestEdit { 137 | timestamp: 11, 138 | value: 5, 139 | }); 140 | assert_eq!(crdt.state(), &[0, 2, 1, 5, 4, 3]); 141 | assert_eq!(crdt.states.len(), 4); 142 | } 143 | 144 | #[test] 145 | fn various_ops_work_with_iter() { 146 | let mut crdt = Opset::new(vec![0], 2); 147 | 148 | let ops = vec![ 149 | TestEdit { 150 | timestamp: 10, 151 | value: 1, 152 | }, 153 | TestEdit { 154 | timestamp: 5, 155 | value: 2, 156 | }, 157 | TestEdit { 158 | timestamp: 15, 159 | value: 3, 160 | }, 161 | TestEdit { 162 | timestamp: 12, 163 | value: 4, 164 | }, 165 | TestEdit { 166 | timestamp: 11, 167 | value: 5, 168 | }, 169 | ]; 170 | crdt.update_from_iter(ops.into_iter()); 171 | assert_eq!(crdt.state(), &[0, 2, 1, 5, 4, 3]); 172 | assert_eq!(crdt.states.len(), 4); 173 | } 174 | } 175 | --------------------------------------------------------------------------------