├── LICENSE ├── README.md ├── list.go ├── list_test.go ├── map.go ├── map_test.go └── profile.sh /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Michael Hendricks 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ps 2 | == 3 | 4 | Persistent data structures for Go. See the [full package documentation](http://godoc.org/github.com/mndrix/ps) 5 | 6 | Install with 7 | 8 | go get github.com/mndrix/ps 9 | -------------------------------------------------------------------------------- /list.go: -------------------------------------------------------------------------------- 1 | package ps 2 | 3 | // List is a persistent list of possibly heterogenous values. 4 | type List interface { 5 | // IsNil returns true if the list is empty 6 | IsNil() bool 7 | 8 | // Cons returns a new list with val as the head 9 | Cons(val interface{}) List 10 | 11 | // Head returns the first element of the list; 12 | // panics if the list is empty 13 | Head() interface{} 14 | 15 | // Tail returns a list with all elements except the head; 16 | // panics if the list is empty 17 | Tail() List 18 | 19 | // Size returns the list's length. This takes O(1) time. 20 | Size() int 21 | 22 | // ForEach executes a callback for each value in the list. 23 | ForEach(f func(interface{})) 24 | 25 | // Reverse returns a list whose elements are in the opposite order as 26 | // the original list. 27 | Reverse() List 28 | } 29 | 30 | // Immutable (i.e. persistent) list 31 | type list struct { 32 | depth int // the number of nodes after, and including, this one 33 | value interface{} 34 | tail *list 35 | } 36 | 37 | // An empty list shared by all lists 38 | var nilList = &list{} 39 | 40 | // NewList returns a new, empty list. The result is a singly linked 41 | // list implementation. All lists share an empty tail, so allocating 42 | // empty lists is efficient in time and memory. 43 | func NewList() List { 44 | return nilList 45 | } 46 | 47 | func (self *list) IsNil() bool { 48 | return self == nilList 49 | } 50 | 51 | func (self *list) Size() int { 52 | return self.depth 53 | } 54 | 55 | func (tail *list) Cons(val interface{}) List { 56 | var xs list 57 | xs.depth = tail.depth + 1 58 | xs.value = val 59 | xs.tail = tail 60 | return &xs 61 | } 62 | 63 | func (self *list) Head() interface{} { 64 | if self.IsNil() { 65 | panic("Called Head() on an empty list") 66 | } 67 | 68 | return self.value 69 | } 70 | 71 | func (self *list) Tail() List { 72 | if self.IsNil() { 73 | panic("Called Tail() on an empty list") 74 | } 75 | 76 | return self.tail 77 | } 78 | 79 | // ForEach executes a callback for each value in the list 80 | func (self *list) ForEach(f func(interface{})) { 81 | if self.IsNil() { 82 | return 83 | } 84 | f(self.Head()) 85 | self.Tail().ForEach(f) 86 | } 87 | 88 | // Reverse returns a list with elements in opposite order as this list 89 | func (self *list) Reverse() List { 90 | reversed := NewList() 91 | self.ForEach(func(v interface{}) { reversed = reversed.Cons(v) }) 92 | return reversed 93 | } 94 | -------------------------------------------------------------------------------- /list_test.go: -------------------------------------------------------------------------------- 1 | package ps 2 | 3 | import "testing" 4 | 5 | func TestListImmutable(t *testing.T) { 6 | // build some lists 7 | one := NewList().Cons("first") 8 | two := one.Cons("second") 9 | zwei := one.Cons("zweite") 10 | 11 | // check each list's length 12 | if size := one.Size(); size != 1 { 13 | t.Errorf("one doesn't have 1 item, it has %d", size) 14 | } 15 | if size := two.Size(); size != 2 { 16 | t.Errorf("two doesn't have 2 items, it has %d", size) 17 | } 18 | if size := zwei.Size(); size != 2 { 19 | t.Errorf("zwei doesn't have 2 item, it has %d", size) 20 | } 21 | 22 | // check each list's contents 23 | if one.Head() != "first" { 24 | t.Errorf("one has the wrong head") 25 | } 26 | if two.Head() != "second" { 27 | t.Errorf("two has the wrong head") 28 | } 29 | if two.Tail().Head() != "first" { 30 | t.Errorf("two has the wrong ending") 31 | } 32 | if zwei.Head() != "zweite" { 33 | t.Errorf("zwei has the wrong head") 34 | } 35 | if zwei.Tail().Head() != "first" { 36 | t.Errorf("zwei has the wrong ending") 37 | } 38 | } 39 | 40 | // benchmark making a really long list 41 | func BenchmarkListCons(b *testing.B) { 42 | l := NewList() 43 | for i := 0; i < b.N; i++ { 44 | l = l.Cons(i) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /map.go: -------------------------------------------------------------------------------- 1 | // Fully persistent data structures. A persistent data structure is a data 2 | // structure that always preserves the previous version of itself when 3 | // it is modified. Such data structures are effectively immutable, 4 | // as their operations do not update the structure in-place, but instead 5 | // always yield a new structure. 6 | // 7 | // Persistent 8 | // data structures typically share structure among themselves. This allows 9 | // operations to avoid copying the entire data structure. 10 | package ps 11 | 12 | import ( 13 | "bytes" 14 | "fmt" 15 | "unsafe" 16 | ) 17 | 18 | // A Map associates unique keys with values. 19 | type Map interface { 20 | // IsNil returns true if the Map is empty 21 | IsNil() bool 22 | 23 | // Set returns a new map in which key and value are associated. 24 | // If the key didn't exist before, it's created; otherwise, the 25 | // associated value is changed. 26 | // This operation is O(log N) in the number of keys. 27 | Set(key string, value interface{}) Map 28 | 29 | // UnsafeMutableSet returns the same map in which key and value are associated 30 | // in-place. If the key didn't exist before, it's created; otherwise, the 31 | // associated value is changed. 32 | // This operation is O(log N) in the number of keys. 33 | // Only use UnsafeMutableSet if you are the only reference-holder of the Map. 34 | UnsafeMutableSet(key string, value interface{}) Map 35 | 36 | // Delete returns a new map with the association for key, if any, removed. 37 | // This operation is O(log N) in the number of keys. 38 | Delete(key string) Map 39 | 40 | // Lookup returns the value associated with a key, if any. If the key 41 | // exists, the second return value is true; otherwise, false. 42 | // This operation is O(log N) in the number of keys. 43 | Lookup(key string) (interface{}, bool) 44 | 45 | // Size returns the number of key value pairs in the map. 46 | // This takes O(1) time. 47 | Size() int 48 | 49 | // ForEach executes a callback on each key value pair in the map. 50 | ForEach(f func(key string, val interface{})) 51 | 52 | // Keys returns a slice with all keys in this map. 53 | // This operation is O(N) in the number of keys. 54 | Keys() []string 55 | 56 | String() string 57 | } 58 | 59 | // Immutable (i.e. persistent) associative array 60 | const childCount = 8 61 | const shiftSize = 3 62 | 63 | type tree struct { 64 | count int 65 | hash uint64 // hash of the key (used for tree balancing) 66 | key string 67 | value interface{} 68 | children [childCount]*tree 69 | } 70 | 71 | var nilMap = &tree{} 72 | 73 | // Recursively set nilMap's subtrees to point at itself. 74 | // This eliminates all nil pointers in the map structure. 75 | // All map nodes are created by cloning this structure so 76 | // they avoid the problem too. 77 | func init() { 78 | for i := range nilMap.children { 79 | nilMap.children[i] = nilMap 80 | } 81 | } 82 | 83 | // NewMap allocates a new, persistent map from strings to values of 84 | // any type. 85 | // This is currently implemented as a path-copying binary tree. 86 | func NewMap() Map { 87 | return nilMap 88 | } 89 | 90 | func (self *tree) IsNil() bool { 91 | return self == nilMap 92 | } 93 | 94 | // clone returns an exact duplicate of a tree node 95 | func (self *tree) clone() *tree { 96 | var m tree 97 | m = *self 98 | return &m 99 | } 100 | 101 | // constants for FNV-1a hash algorithm 102 | const ( 103 | offset64 uint64 = 14695981039346656037 104 | prime64 uint64 = 1099511628211 105 | ) 106 | 107 | type unsafeString struct { 108 | Data uintptr 109 | Len int 110 | } 111 | 112 | type unsafeSlice struct { 113 | Data uintptr 114 | Len int 115 | Cap int 116 | } 117 | 118 | var zeroByteSlice = []byte{} 119 | 120 | // bytesView returns a view of the string as a []byte. 121 | // It doesn't incur allocation and copying caused by conversion but it's 122 | // unsafe, use with care. 123 | func bytesView(v string) []byte { 124 | if len(v) == 0 { 125 | return zeroByteSlice 126 | } 127 | 128 | sx := (*unsafeString)(unsafe.Pointer(&v)) 129 | bx := unsafeSlice{sx.Data, sx.Len, sx.Len} 130 | return *(*[]byte)(unsafe.Pointer(&bx)) 131 | } 132 | 133 | // hashKey returns a hash code for a given string 134 | func hashKey(key string) uint64 { 135 | hash := offset64 136 | 137 | for _, b := range bytesView(key) { 138 | hash ^= uint64(b) 139 | hash *= prime64 140 | } 141 | return hash 142 | } 143 | 144 | // Set returns a new map similar to this one but with key and value 145 | // associated. If the key didn't exist, it's created; otherwise, the 146 | // associated value is changed. 147 | func (self *tree) Set(key string, value interface{}) Map { 148 | hash := hashKey(key) 149 | return setLowLevel(self, hash, hash, key, value) 150 | } 151 | 152 | func setLowLevel(self *tree, partialHash, hash uint64, key string, value interface{}) *tree { 153 | if self.IsNil() { // an empty tree is easy 154 | m := self.clone() 155 | m.count = 1 156 | m.hash = hash 157 | m.key = key 158 | m.value = value 159 | return m 160 | } 161 | 162 | if hash != self.hash { 163 | m := self.clone() 164 | i := partialHash % childCount 165 | m.children[i] = setLowLevel(self.children[i], partialHash>>shiftSize, hash, key, value) 166 | // update count if we added a new object 167 | if m.children[i].count > self.children[i].count { 168 | m.count++ 169 | } 170 | return m 171 | } 172 | 173 | // did we find a hash collision? 174 | if key != self.key { 175 | oops := fmt.Sprintf("Hash collision between: '%s' and '%s'. Please report to https://github.com/mndrix/ps/issues/new", self.key, key) 176 | panic(oops) 177 | } 178 | 179 | // replacing a key's previous value 180 | m := self.clone() 181 | m.value = value 182 | return m 183 | } 184 | 185 | // UnsafeMutableSet is the in-place mutable version of Set. Only use if 186 | // you are the only reference-holder of the Map. 187 | func (self *tree) UnsafeMutableSet(key string, value interface{}) Map { 188 | hash := hashKey(key) 189 | return mutableSetLowLevel(self, hash, hash, key, value) 190 | } 191 | 192 | func mutableSetLowLevel(self *tree, partialHash, hash uint64, key string, value interface{}) *tree { 193 | if self.IsNil() { // an empty tree is easy 194 | m := self.clone() 195 | m.count = 1 196 | m.hash = hash 197 | m.key = key 198 | m.value = value 199 | return m 200 | } 201 | 202 | if hash != self.hash { 203 | i := partialHash % childCount 204 | oldChildCount := self.children[i].count 205 | self.children[i] = mutableSetLowLevel(self.children[i], partialHash>>shiftSize, hash, key, value) 206 | // update count if we added a new object 207 | if oldChildCount < self.children[i].count { 208 | self.count++ 209 | } 210 | return self 211 | } 212 | 213 | // did we find a hash collision? 214 | if key != self.key { 215 | oops := fmt.Sprintf("Hash collision between: '%s' and '%s'. Please report to https://github.com/mndrix/ps/issues/new", self.key, key) 216 | panic(oops) 217 | } 218 | 219 | // replacing a key's previous value 220 | self.value = value 221 | return self 222 | } 223 | 224 | // modifies a map by recalculating its key count based on the counts 225 | // of its subtrees 226 | func recalculateCount(m *tree) { 227 | count := 0 228 | for _, t := range m.children { 229 | count += t.Size() 230 | } 231 | m.count = count + 1 // add one to count ourself 232 | } 233 | 234 | func (m *tree) Delete(key string) Map { 235 | hash := hashKey(key) 236 | newMap, _ := deleteLowLevel(m, hash, hash) 237 | return newMap 238 | } 239 | 240 | func deleteLowLevel(self *tree, partialHash, hash uint64) (*tree, bool) { 241 | // empty trees are easy 242 | if self.IsNil() { 243 | return self, false 244 | } 245 | 246 | if hash != self.hash { 247 | i := partialHash % childCount 248 | child, found := deleteLowLevel(self.children[i], partialHash>>shiftSize, hash) 249 | if !found { 250 | return self, false 251 | } 252 | newMap := self.clone() 253 | newMap.children[i] = child 254 | recalculateCount(newMap) 255 | return newMap, true // ? this wasn't in the original code 256 | } 257 | 258 | // we must delete our own node 259 | if self.isLeaf() { // we have no children 260 | return nilMap, true 261 | } 262 | /* 263 | if self.subtreeCount() == 1 { // only one subtree 264 | for _, t := range self.children { 265 | if t != nilMap { 266 | return t, true 267 | } 268 | } 269 | panic("Tree with 1 subtree actually had no subtrees") 270 | } 271 | */ 272 | 273 | // find a node to replace us 274 | i := -1 275 | size := -1 276 | for j, t := range self.children { 277 | if t.Size() > size { 278 | i = j 279 | size = t.Size() 280 | } 281 | } 282 | 283 | // make chosen leaf smaller 284 | replacement, child := self.children[i].deleteLeftmost() 285 | newMap := replacement.clone() 286 | for j := range self.children { 287 | if j == i { 288 | newMap.children[j] = child 289 | } else { 290 | newMap.children[j] = self.children[j] 291 | } 292 | } 293 | recalculateCount(newMap) 294 | return newMap, true 295 | } 296 | 297 | // delete the leftmost node in a tree returning the node that 298 | // was deleted and the tree left over after its deletion 299 | func (m *tree) deleteLeftmost() (*tree, *tree) { 300 | if m.isLeaf() { 301 | return m, nilMap 302 | } 303 | 304 | for i, t := range m.children { 305 | if t != nilMap { 306 | deleted, child := t.deleteLeftmost() 307 | newMap := m.clone() 308 | newMap.children[i] = child 309 | recalculateCount(newMap) 310 | return deleted, newMap 311 | } 312 | } 313 | panic("Tree isn't a leaf but also had no children. How does that happen?") 314 | } 315 | 316 | // isLeaf returns true if this is a leaf node 317 | func (m *tree) isLeaf() bool { 318 | return m.Size() == 1 319 | } 320 | 321 | // returns the number of child subtrees we have 322 | func (m *tree) subtreeCount() int { 323 | count := 0 324 | for _, t := range m.children { 325 | if t != nilMap { 326 | count++ 327 | } 328 | } 329 | return count 330 | } 331 | 332 | func (m *tree) Lookup(key string) (interface{}, bool) { 333 | hash := hashKey(key) 334 | return lookupLowLevel(m, hash, hash) 335 | } 336 | 337 | func lookupLowLevel(self *tree, partialHash, hash uint64) (interface{}, bool) { 338 | if self.IsNil() { // an empty tree is easy 339 | return nil, false 340 | } 341 | 342 | if hash != self.hash { 343 | i := partialHash % childCount 344 | return lookupLowLevel(self.children[i], partialHash>>shiftSize, hash) 345 | } 346 | 347 | // we found it 348 | return self.value, true 349 | } 350 | 351 | func (m *tree) Size() int { 352 | return m.count 353 | } 354 | 355 | func (m *tree) ForEach(f func(key string, val interface{})) { 356 | if m.IsNil() { 357 | return 358 | } 359 | 360 | // ourself 361 | f(m.key, m.value) 362 | 363 | // children 364 | for _, t := range m.children { 365 | if t != nilMap { 366 | t.ForEach(f) 367 | } 368 | } 369 | } 370 | 371 | func (m *tree) Keys() []string { 372 | keys := make([]string, m.Size()) 373 | i := 0 374 | m.ForEach(func(k string, v interface{}) { 375 | keys[i] = k 376 | i++ 377 | }) 378 | return keys 379 | } 380 | 381 | // make it easier to display maps for debugging 382 | func (m *tree) String() string { 383 | keys := m.Keys() 384 | buf := bytes.NewBufferString("{") 385 | for _, key := range keys { 386 | val, _ := m.Lookup(key) 387 | fmt.Fprintf(buf, "%s: %s, ", key, val) 388 | } 389 | fmt.Fprintf(buf, "}\n") 390 | return buf.String() 391 | } 392 | -------------------------------------------------------------------------------- /map_test.go: -------------------------------------------------------------------------------- 1 | package ps 2 | 3 | import . "strconv" 4 | 5 | import "testing" 6 | import "sort" 7 | 8 | func TestMapNil(t *testing.T) { 9 | m := NewMap() 10 | keys := m.Keys() 11 | if len(keys) != 0 { 12 | t.Errorf("Empty map has keys") 13 | } 14 | } 15 | 16 | func TestMapImmutable(t *testing.T) { 17 | // build a couple small maps 18 | world := NewMap().Set("hello", "world") 19 | kids := world.Set("hello", "kids") 20 | 21 | // both maps should still retain their data 22 | if v, _ := world.Lookup("hello"); v != "world" { 23 | t.Errorf("Set() modified the receiving map") 24 | } 25 | if size := world.Size(); size != 1 { 26 | t.Errorf("world size is not 1 : %d", size) 27 | } 28 | if v, _ := kids.Lookup("hello"); v != "kids" { 29 | t.Errorf("Set() did not modify the resulting map") 30 | } 31 | if size := kids.Size(); size != 1 { 32 | t.Errorf("kids size is not 1 : %d", size) 33 | } 34 | 35 | // both maps have the right keys 36 | if keys := world.Keys(); len(keys) != 1 || keys[0] != "hello" { 37 | t.Errorf("world has the wrong keys: %#v", keys) 38 | } 39 | if keys := kids.Keys(); len(keys) != 1 || keys[0] != "hello" { 40 | t.Errorf("kids has the wrong keys: %#v", keys) 41 | } 42 | 43 | // test deletion 44 | empty := kids.Delete("hello") 45 | if size := empty.Size(); size != 0 { 46 | t.Errorf("empty size is not 1 : %d", size) 47 | } 48 | if keys := empty.Keys(); len(keys) != 0 { 49 | t.Errorf("empty has the wrong keys: %#v", keys) 50 | } 51 | } 52 | 53 | func TestMapMultipleKeys(t *testing.T) { 54 | // map with multiple keys each with pointer values 55 | one := 1 56 | two := 2 57 | three := 3 58 | m := NewMap().Set("one", &one).Set("two", &two).Set("three", &three) 59 | 60 | // do we have the right number of keys? 61 | keys := m.Keys() 62 | if len(keys) != 3 { 63 | t.Logf("wrong size keys: %d", len(keys)) 64 | t.FailNow() 65 | } 66 | 67 | // do we have the right keys? 68 | sort.Strings(keys) 69 | if keys[0] != "one" { 70 | t.Errorf("unexpected key: %s", keys[0]) 71 | } 72 | if keys[1] != "three" { 73 | t.Errorf("unexpected key: %s", keys[1]) 74 | } 75 | if keys[2] != "two" { 76 | t.Errorf("unexpected key: %s", keys[2]) 77 | } 78 | 79 | // do we have the right values? 80 | vp, ok := m.Lookup("one") 81 | if !ok { 82 | t.Logf("missing value for one") 83 | t.FailNow() 84 | } 85 | if v := vp.(*int); *v != 1 { 86 | t.Errorf("wrong value: %d\n", *v) 87 | } 88 | vp, ok = m.Lookup("two") 89 | if !ok { 90 | t.Logf("missing value for two") 91 | t.FailNow() 92 | } 93 | if v := vp.(*int); *v != 2 { 94 | t.Errorf("wrong value: %d\n", *v) 95 | } 96 | vp, ok = m.Lookup("three") 97 | if !ok { 98 | t.Logf("missing value for three") 99 | t.FailNow() 100 | } 101 | if v := vp.(*int); *v != 3 { 102 | t.Errorf("wrong value: %d\n", *v) 103 | } 104 | } 105 | 106 | func TestMapManyKeys(t *testing.T) { 107 | // build a map with many keys and values 108 | count := 100 109 | m := NewMap() 110 | for i := 0; i < count; i++ { 111 | m = m.Set(Itoa(i), i) 112 | } 113 | 114 | if m.Size() != 100 { 115 | t.Errorf("Wrong number of keys: %d", m.Size()) 116 | } 117 | 118 | m = m.Delete("42").Delete("7").Delete("19").Delete("99") 119 | if m.Size() != 96 { 120 | t.Errorf("Wrong number of keys: %d", m.Size()) 121 | } 122 | 123 | for i := 43; i < 99; i++ { 124 | v, ok := m.Lookup(Itoa(i)) 125 | if !ok || v != i { 126 | t.Errorf("Wrong value for key %d", i) 127 | } 128 | } 129 | } 130 | 131 | func TestMapUnsafeMutableSet(t *testing.T) { 132 | // build a map with many keys and values 133 | count := 100 134 | m := NewMap() 135 | for i := 0; i < count; i++ { 136 | m = m.UnsafeMutableSet(Itoa(i), i) 137 | } 138 | 139 | if m.Size() != 100 { 140 | t.Errorf("Wrong number of keys: %d", m.Size()) 141 | } 142 | 143 | m = m.Delete("42").Delete("7").Delete("19").Delete("99") 144 | if m.Size() != 96 { 145 | t.Errorf("Wrong number of keys: %d", m.Size()) 146 | } 147 | 148 | for i := 43; i < 99; i++ { 149 | v, ok := m.Lookup(Itoa(i)) 150 | if !ok || v != i { 151 | t.Errorf("Wrong value for key %d", i) 152 | } 153 | } 154 | } 155 | 156 | func TestMapHashKey(t *testing.T) { 157 | hash := hashKey("this is a key") 158 | if hash != 10424450902216330915 { 159 | t.Errorf("This isn't FNV-1a hashing: %d", hash) 160 | } 161 | } 162 | 163 | func BenchmarkMapSet(b *testing.B) { 164 | m := NewMap() 165 | for i := 0; i < b.N; i++ { 166 | m = m.Set("foo", i) 167 | } 168 | } 169 | 170 | func BenchmarkMapUnsafeMutableSet(b *testing.B) { 171 | m := NewMap() 172 | for i := 0; i < b.N; i++ { 173 | m = m.UnsafeMutableSet("foo", i) 174 | } 175 | } 176 | 177 | func BenchmarkMapDelete(b *testing.B) { 178 | m := NewMap().Set("key", "value") 179 | for i := 0; i < b.N; i++ { 180 | m.Delete("key") 181 | } 182 | } 183 | 184 | func BenchmarkHashKey(b *testing.B) { 185 | key := "this is a key" 186 | for i := 0; i < b.N; i++ { 187 | _ = hashKey(key) 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /profile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | go test -c 3 | ./ps.test -test.run=none -test.bench=$2 -test.$1profile=$1.profile 4 | --------------------------------------------------------------------------------