├── LICENSE ├── Makefile ├── README.md ├── chord.go ├── chord_test.go ├── iter_closest.go ├── iter_closest_test.go ├── net.go ├── net_test.go ├── ring.go ├── ring_test.go ├── transport.go ├── transport_test.go ├── util.go ├── util_test.go ├── vnode.go └── vnode_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Armon Dadgar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | build: 3 | go build 4 | 5 | test: 6 | go test . 7 | 8 | cov: 9 | gocov test github.com/armon/go-chord | gocov-html > /tmp/coverage.html 10 | open /tmp/coverage.html 11 | 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Go Chord 2 | 3 | This package provides a Golang implementation of the Chord protocol. 4 | Chord is used to organize nodes along a ring in a consistent way. It can be 5 | used to distribute work, build a key/value store, or serve as the underlying 6 | organization for a ring overlay topology. 7 | 8 | The protocol is seperated from the implementation of an underlying network 9 | transport or RPC mechanism. Instead Chord relies on a transport implementation. 10 | A TCPTransport is provided that can be used as a reliable Chord RPC mechanism. 11 | 12 | # Documentation 13 | 14 | To view the online documentation, go [here](http://godoc.org/github.com/armon/go-chord). 15 | 16 | -------------------------------------------------------------------------------- /chord.go: -------------------------------------------------------------------------------- 1 | /* 2 | This package is used to provide an implementation of the 3 | Chord network protocol. 4 | */ 5 | package chord 6 | 7 | import ( 8 | "crypto/sha1" 9 | "fmt" 10 | "hash" 11 | "time" 12 | ) 13 | 14 | // Implements the methods needed for a Chord ring 15 | type Transport interface { 16 | // Gets a list of the vnodes on the box 17 | ListVnodes(string) ([]*Vnode, error) 18 | 19 | // Ping a Vnode, check for liveness 20 | Ping(*Vnode) (bool, error) 21 | 22 | // Request a nodes predecessor 23 | GetPredecessor(*Vnode) (*Vnode, error) 24 | 25 | // Notify our successor of ourselves 26 | Notify(target, self *Vnode) ([]*Vnode, error) 27 | 28 | // Find a successor 29 | FindSuccessors(*Vnode, int, []byte) ([]*Vnode, error) 30 | 31 | // Clears a predecessor if it matches a given vnode. Used to leave. 32 | ClearPredecessor(target, self *Vnode) error 33 | 34 | // Instructs a node to skip a given successor. Used to leave. 35 | SkipSuccessor(target, self *Vnode) error 36 | 37 | // Register for an RPC callbacks 38 | Register(*Vnode, VnodeRPC) 39 | } 40 | 41 | // These are the methods to invoke on the registered vnodes 42 | type VnodeRPC interface { 43 | GetPredecessor() (*Vnode, error) 44 | Notify(*Vnode) ([]*Vnode, error) 45 | FindSuccessors(int, []byte) ([]*Vnode, error) 46 | ClearPredecessor(*Vnode) error 47 | SkipSuccessor(*Vnode) error 48 | } 49 | 50 | // Delegate to notify on ring events 51 | type Delegate interface { 52 | NewPredecessor(local, remoteNew, remotePrev *Vnode) 53 | Leaving(local, pred, succ *Vnode) 54 | PredecessorLeaving(local, remote *Vnode) 55 | SuccessorLeaving(local, remote *Vnode) 56 | Shutdown() 57 | } 58 | 59 | // Configuration for Chord nodes 60 | type Config struct { 61 | Hostname string // Local host name 62 | NumVnodes int // Number of vnodes per physical node 63 | HashFunc func() hash.Hash // Hash function to use 64 | StabilizeMin time.Duration // Minimum stabilization time 65 | StabilizeMax time.Duration // Maximum stabilization time 66 | NumSuccessors int // Number of successors to maintain 67 | Delegate Delegate // Invoked to handle ring events 68 | hashBits int // Bit size of the hash function 69 | } 70 | 71 | // Represents an Vnode, local or remote 72 | type Vnode struct { 73 | Id []byte // Virtual ID 74 | Host string // Host identifier 75 | } 76 | 77 | // Represents a local Vnode 78 | type localVnode struct { 79 | Vnode 80 | ring *Ring 81 | successors []*Vnode 82 | finger []*Vnode 83 | last_finger int 84 | predecessor *Vnode 85 | stabilized time.Time 86 | timer *time.Timer 87 | } 88 | 89 | // Stores the state required for a Chord ring 90 | type Ring struct { 91 | config *Config 92 | transport Transport 93 | vnodes []*localVnode 94 | delegateCh chan func() 95 | shutdown chan bool 96 | } 97 | 98 | // Returns the default Ring configuration 99 | func DefaultConfig(hostname string) *Config { 100 | return &Config{ 101 | hostname, 102 | 8, // 8 vnodes 103 | sha1.New, // SHA1 104 | time.Duration(15 * time.Second), 105 | time.Duration(45 * time.Second), 106 | 8, // 8 successors 107 | nil, // No delegate 108 | 160, // 160bit hash function 109 | } 110 | } 111 | 112 | // Creates a new Chord ring given the config and transport 113 | func Create(conf *Config, trans Transport) (*Ring, error) { 114 | // Initialize the hash bits 115 | conf.hashBits = conf.HashFunc().Size() * 8 116 | 117 | // Create and initialize a ring 118 | ring := &Ring{} 119 | ring.init(conf, trans) 120 | ring.setLocalSuccessors() 121 | ring.schedule() 122 | return ring, nil 123 | } 124 | 125 | // Joins an existing Chord ring 126 | func Join(conf *Config, trans Transport, existing string) (*Ring, error) { 127 | // Initialize the hash bits 128 | conf.hashBits = conf.HashFunc().Size() * 8 129 | 130 | // Request a list of Vnodes from the remote host 131 | hosts, err := trans.ListVnodes(existing) 132 | if err != nil { 133 | return nil, err 134 | } 135 | if hosts == nil || len(hosts) == 0 { 136 | return nil, fmt.Errorf("Remote host has no vnodes!") 137 | } 138 | 139 | // Create a ring 140 | ring := &Ring{} 141 | ring.init(conf, trans) 142 | 143 | // Acquire a live successor for each Vnode 144 | for _, vn := range ring.vnodes { 145 | // Get the nearest remote vnode 146 | nearest := nearestVnodeToKey(hosts, vn.Id) 147 | 148 | // Query for a list of successors to this Vnode 149 | succs, err := trans.FindSuccessors(nearest, conf.NumSuccessors, vn.Id) 150 | if err != nil { 151 | return nil, fmt.Errorf("Failed to find successor for vnodes! Got %s", err) 152 | } 153 | if succs == nil || len(succs) == 0 { 154 | return nil, fmt.Errorf("Failed to find successor for vnodes! Got no vnodes!") 155 | } 156 | 157 | // Assign the successors 158 | for idx, s := range succs { 159 | vn.successors[idx] = s 160 | } 161 | } 162 | 163 | // Start delegate handler 164 | if ring.config.Delegate != nil { 165 | go ring.delegateHandler() 166 | } 167 | 168 | // Do a fast stabilization, will schedule regular execution 169 | for _, vn := range ring.vnodes { 170 | vn.stabilize() 171 | } 172 | return ring, nil 173 | } 174 | 175 | // Leaves a given Chord ring and shuts down the local vnodes 176 | func (r *Ring) Leave() error { 177 | // Shutdown the vnodes first to avoid further stabilization runs 178 | r.stopVnodes() 179 | 180 | // Instruct each vnode to leave 181 | var err error 182 | for _, vn := range r.vnodes { 183 | err = mergeErrors(err, vn.leave()) 184 | } 185 | 186 | // Wait for the delegate callbacks to complete 187 | r.stopDelegate() 188 | return err 189 | } 190 | 191 | // Shutdown shuts down the local processes in a given Chord ring 192 | // Blocks until all the vnodes terminate. 193 | func (r *Ring) Shutdown() { 194 | r.stopVnodes() 195 | r.stopDelegate() 196 | } 197 | 198 | // Does a key lookup for up to N successors of a key 199 | func (r *Ring) Lookup(n int, key []byte) ([]*Vnode, error) { 200 | // Ensure that n is sane 201 | if n > r.config.NumSuccessors { 202 | return nil, fmt.Errorf("Cannot ask for more successors than NumSuccessors!") 203 | } 204 | 205 | // Hash the key 206 | h := r.config.HashFunc() 207 | h.Write(key) 208 | key_hash := h.Sum(nil) 209 | 210 | // Find the nearest local vnode 211 | nearest := r.nearestVnode(key_hash) 212 | 213 | // Use the nearest node for the lookup 214 | successors, err := nearest.FindSuccessors(n, key_hash) 215 | if err != nil { 216 | return nil, err 217 | } 218 | 219 | // Trim the nil successors 220 | for successors[len(successors)-1] == nil { 221 | successors = successors[:len(successors)-1] 222 | } 223 | return successors, nil 224 | } 225 | -------------------------------------------------------------------------------- /chord_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "runtime" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | type MultiLocalTrans struct { 10 | remote Transport 11 | hosts map[string]*LocalTransport 12 | } 13 | 14 | func InitMLTransport() *MultiLocalTrans { 15 | hosts := make(map[string]*LocalTransport) 16 | remote := &BlackholeTransport{} 17 | ml := &MultiLocalTrans{hosts: hosts} 18 | ml.remote = remote 19 | return ml 20 | } 21 | 22 | func (ml *MultiLocalTrans) ListVnodes(host string) ([]*Vnode, error) { 23 | if local, ok := ml.hosts[host]; ok { 24 | return local.ListVnodes(host) 25 | } 26 | return ml.remote.ListVnodes(host) 27 | } 28 | 29 | // Ping a Vnode, check for liveness 30 | func (ml *MultiLocalTrans) Ping(v *Vnode) (bool, error) { 31 | if local, ok := ml.hosts[v.Host]; ok { 32 | return local.Ping(v) 33 | } 34 | return ml.remote.Ping(v) 35 | } 36 | 37 | // Request a nodes predecessor 38 | func (ml *MultiLocalTrans) GetPredecessor(v *Vnode) (*Vnode, error) { 39 | if local, ok := ml.hosts[v.Host]; ok { 40 | return local.GetPredecessor(v) 41 | } 42 | return ml.remote.GetPredecessor(v) 43 | } 44 | 45 | // Notify our successor of ourselves 46 | func (ml *MultiLocalTrans) Notify(target, self *Vnode) ([]*Vnode, error) { 47 | if local, ok := ml.hosts[target.Host]; ok { 48 | return local.Notify(target, self) 49 | } 50 | return ml.remote.Notify(target, self) 51 | } 52 | 53 | // Find a successor 54 | func (ml *MultiLocalTrans) FindSuccessors(v *Vnode, n int, k []byte) ([]*Vnode, error) { 55 | if local, ok := ml.hosts[v.Host]; ok { 56 | return local.FindSuccessors(v, n, k) 57 | } 58 | return ml.remote.FindSuccessors(v, n, k) 59 | } 60 | 61 | // Clears a predecessor if it matches a given vnode. Used to leave. 62 | func (ml *MultiLocalTrans) ClearPredecessor(target, self *Vnode) error { 63 | if local, ok := ml.hosts[target.Host]; ok { 64 | return local.ClearPredecessor(target, self) 65 | } 66 | return ml.remote.ClearPredecessor(target, self) 67 | } 68 | 69 | // Instructs a node to skip a given successor. Used to leave. 70 | func (ml *MultiLocalTrans) SkipSuccessor(target, self *Vnode) error { 71 | if local, ok := ml.hosts[target.Host]; ok { 72 | return local.SkipSuccessor(target, self) 73 | } 74 | return ml.remote.SkipSuccessor(target, self) 75 | } 76 | 77 | func (ml *MultiLocalTrans) Register(v *Vnode, o VnodeRPC) { 78 | local, ok := ml.hosts[v.Host] 79 | if !ok { 80 | local = InitLocalTransport(nil).(*LocalTransport) 81 | ml.hosts[v.Host] = local 82 | } 83 | local.Register(v, o) 84 | } 85 | 86 | func (ml *MultiLocalTrans) Deregister(host string) { 87 | delete(ml.hosts, host) 88 | } 89 | 90 | func TestDefaultConfig(t *testing.T) { 91 | conf := DefaultConfig("test") 92 | if conf.Hostname != "test" { 93 | t.Fatalf("bad hostname") 94 | } 95 | if conf.NumVnodes != 8 { 96 | t.Fatalf("bad num vnodes") 97 | } 98 | if conf.NumSuccessors != 8 { 99 | t.Fatalf("bad num succ") 100 | } 101 | if conf.HashFunc == nil { 102 | t.Fatalf("bad hash") 103 | } 104 | if conf.hashBits != 160 { 105 | t.Fatalf("bad hash bits") 106 | } 107 | if conf.StabilizeMin != time.Duration(15*time.Second) { 108 | t.Fatalf("bad min stable") 109 | } 110 | if conf.StabilizeMax != time.Duration(45*time.Second) { 111 | t.Fatalf("bad max stable") 112 | } 113 | if conf.Delegate != nil { 114 | t.Fatalf("bad delegate") 115 | } 116 | } 117 | 118 | func fastConf() *Config { 119 | conf := DefaultConfig("test") 120 | conf.StabilizeMin = time.Duration(15 * time.Millisecond) 121 | conf.StabilizeMax = time.Duration(45 * time.Millisecond) 122 | return conf 123 | } 124 | 125 | func TestCreateShutdown(t *testing.T) { 126 | // Start the timer thread 127 | time.After(15) 128 | conf := fastConf() 129 | numGo := runtime.NumGoroutine() 130 | r, err := Create(conf, nil) 131 | if err != nil { 132 | t.Fatalf("unexpected err. %s", err) 133 | } 134 | r.Shutdown() 135 | after := runtime.NumGoroutine() 136 | if after != numGo { 137 | t.Fatalf("unexpected routines! A:%d B:%d", after, numGo) 138 | } 139 | } 140 | 141 | func TestJoin(t *testing.T) { 142 | // Create a multi transport 143 | ml := InitMLTransport() 144 | 145 | // Create the initial ring 146 | conf := fastConf() 147 | r, err := Create(conf, ml) 148 | if err != nil { 149 | t.Fatalf("unexpected err. %s", err) 150 | } 151 | 152 | // Create a second ring 153 | conf2 := fastConf() 154 | conf2.Hostname = "test2" 155 | r2, err := Join(conf2, ml, "test") 156 | if err != nil { 157 | t.Fatalf("failed to join local node! Got %s", err) 158 | } 159 | 160 | // Shutdown 161 | r.Shutdown() 162 | r2.Shutdown() 163 | } 164 | 165 | func TestJoinDeadHost(t *testing.T) { 166 | // Create a multi transport 167 | ml := InitMLTransport() 168 | 169 | // Create the initial ring 170 | conf := fastConf() 171 | _, err := Join(conf, ml, "noop") 172 | if err == nil { 173 | t.Fatalf("expected err!") 174 | } 175 | } 176 | 177 | func TestLeave(t *testing.T) { 178 | // Create a multi transport 179 | ml := InitMLTransport() 180 | 181 | // Create the initial ring 182 | conf := fastConf() 183 | r, err := Create(conf, ml) 184 | if err != nil { 185 | t.Fatalf("unexpected err. %s", err) 186 | } 187 | 188 | // Create a second ring 189 | conf2 := fastConf() 190 | conf2.Hostname = "test2" 191 | r2, err := Join(conf2, ml, "test") 192 | if err != nil { 193 | t.Fatalf("failed to join local node! Got %s", err) 194 | } 195 | 196 | // Wait for some stabilization 197 | <-time.After(100 * time.Millisecond) 198 | 199 | // Node 1 should leave 200 | r.Leave() 201 | ml.Deregister("test") 202 | 203 | // Wait for stabilization 204 | <-time.After(100 * time.Millisecond) 205 | 206 | // Verify r2 ring is still in tact 207 | num := len(r2.vnodes) 208 | for idx, vn := range r2.vnodes { 209 | if vn.successors[0] != &r2.vnodes[(idx+1)%num].Vnode { 210 | t.Fatalf("bad successor! Got:%s:%s", vn.successors[0].Host, 211 | vn.successors[0]) 212 | } 213 | } 214 | } 215 | 216 | func TestLookupBadN(t *testing.T) { 217 | // Create a multi transport 218 | ml := InitMLTransport() 219 | 220 | // Create the initial ring 221 | conf := fastConf() 222 | r, err := Create(conf, ml) 223 | if err != nil { 224 | t.Fatalf("unexpected err. %s", err) 225 | } 226 | 227 | _, err = r.Lookup(10, []byte("test")) 228 | if err == nil { 229 | t.Fatalf("expected err!") 230 | } 231 | } 232 | 233 | func TestLookup(t *testing.T) { 234 | // Create a multi transport 235 | ml := InitMLTransport() 236 | 237 | // Create the initial ring 238 | conf := fastConf() 239 | r, err := Create(conf, ml) 240 | if err != nil { 241 | t.Fatalf("unexpected err. %s", err) 242 | } 243 | 244 | // Create a second ring 245 | conf2 := fastConf() 246 | conf2.Hostname = "test2" 247 | r2, err := Join(conf2, ml, "test") 248 | if err != nil { 249 | t.Fatalf("failed to join local node! Got %s", err) 250 | } 251 | 252 | // Wait for some stabilization 253 | <-time.After(100 * time.Millisecond) 254 | 255 | // Try key lookup 256 | keys := [][]byte{[]byte("test"), []byte("foo"), []byte("bar")} 257 | for _, k := range keys { 258 | vn1, err := r.Lookup(3, k) 259 | if err != nil { 260 | t.Fatalf("unexpected err %s", err) 261 | } 262 | vn2, err := r2.Lookup(3, k) 263 | if err != nil { 264 | t.Fatalf("unexpected err %s", err) 265 | } 266 | if len(vn1) != len(vn2) { 267 | t.Fatalf("result len differs!") 268 | } 269 | for idx := range vn1 { 270 | if vn1[idx].String() != vn2[idx].String() { 271 | t.Fatalf("results differ!") 272 | } 273 | } 274 | } 275 | } 276 | -------------------------------------------------------------------------------- /iter_closest.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "math/big" 5 | ) 6 | 7 | type closestPreceedingVnodeIterator struct { 8 | key []byte 9 | vn *localVnode 10 | finger_idx int 11 | successor_idx int 12 | yielded map[string]struct{} 13 | } 14 | 15 | func (cp *closestPreceedingVnodeIterator) init(vn *localVnode, key []byte) { 16 | cp.key = key 17 | cp.vn = vn 18 | cp.successor_idx = len(vn.successors) - 1 19 | cp.finger_idx = len(vn.finger) - 1 20 | cp.yielded = make(map[string]struct{}) 21 | } 22 | 23 | func (cp *closestPreceedingVnodeIterator) Next() *Vnode { 24 | // Try to find each node 25 | var successor_node *Vnode 26 | var finger_node *Vnode 27 | 28 | // Scan to find the next successor 29 | vn := cp.vn 30 | var i int 31 | for i = cp.successor_idx; i >= 0; i-- { 32 | if vn.successors[i] == nil { 33 | continue 34 | } 35 | if _, ok := cp.yielded[vn.successors[i].String()]; ok { 36 | continue 37 | } 38 | if between(vn.Id, cp.key, vn.successors[i].Id) { 39 | successor_node = vn.successors[i] 40 | break 41 | } 42 | } 43 | cp.successor_idx = i 44 | 45 | // Scan to find the next finger 46 | for i = cp.finger_idx; i >= 0; i-- { 47 | if vn.finger[i] == nil { 48 | continue 49 | } 50 | if _, ok := cp.yielded[vn.finger[i].String()]; ok { 51 | continue 52 | } 53 | if between(vn.Id, cp.key, vn.finger[i].Id) { 54 | finger_node = vn.finger[i] 55 | break 56 | } 57 | } 58 | cp.finger_idx = i 59 | 60 | // Determine which node is better 61 | if successor_node != nil && finger_node != nil { 62 | // Determine the closer node 63 | hb := cp.vn.ring.config.hashBits 64 | closest := closest_preceeding_vnode(successor_node, 65 | finger_node, cp.key, hb) 66 | if closest == successor_node { 67 | cp.successor_idx-- 68 | } else { 69 | cp.finger_idx-- 70 | } 71 | cp.yielded[closest.String()] = struct{}{} 72 | return closest 73 | 74 | } else if successor_node != nil { 75 | cp.successor_idx-- 76 | cp.yielded[successor_node.String()] = struct{}{} 77 | return successor_node 78 | 79 | } else if finger_node != nil { 80 | cp.finger_idx-- 81 | cp.yielded[finger_node.String()] = struct{}{} 82 | return finger_node 83 | } 84 | 85 | return nil 86 | } 87 | 88 | // Returns the closest preceeding Vnode to the key 89 | func closest_preceeding_vnode(a, b *Vnode, key []byte, bits int) *Vnode { 90 | a_dist := distance(a.Id, key, bits) 91 | b_dist := distance(b.Id, key, bits) 92 | if a_dist.Cmp(b_dist) <= 0 { 93 | return a 94 | } else { 95 | return b 96 | } 97 | } 98 | 99 | // Computes the forward distance from a to b modulus a ring size 100 | func distance(a, b []byte, bits int) *big.Int { 101 | // Get the ring size 102 | var ring big.Int 103 | ring.Exp(big.NewInt(2), big.NewInt(int64(bits)), nil) 104 | 105 | // Convert to int 106 | var a_int, b_int big.Int 107 | (&a_int).SetBytes(a) 108 | (&b_int).SetBytes(b) 109 | 110 | // Compute the distances 111 | var dist big.Int 112 | (&dist).Sub(&b_int, &a_int) 113 | 114 | // Distance modulus ring size 115 | (&dist).Mod(&dist, &ring) 116 | return &dist 117 | } 118 | -------------------------------------------------------------------------------- /iter_closest_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "math/big" 5 | "testing" 6 | ) 7 | 8 | func TestNextClosest(t *testing.T) { 9 | // Make the vnodes on the ring (mod 64) 10 | v1 := &Vnode{Id: []byte{1}} 11 | v2 := &Vnode{Id: []byte{10}} 12 | //v3 := &Vnode{Id: []byte{20}} 13 | v4 := &Vnode{Id: []byte{32}} 14 | //v5 := &Vnode{Id: []byte{40}} 15 | v6 := &Vnode{Id: []byte{59}} 16 | v7 := &Vnode{Id: []byte{62}} 17 | 18 | // Make a vnode 19 | vn := &localVnode{} 20 | vn.Id = []byte{54} 21 | vn.successors = []*Vnode{v6, v7, nil} 22 | vn.finger = []*Vnode{v6, v6, v7, v1, v2, v4, nil} 23 | vn.ring = &Ring{} 24 | vn.ring.config = &Config{hashBits: 6} 25 | 26 | // Make an iterator 27 | k := []byte{32} 28 | cp := &closestPreceedingVnodeIterator{} 29 | cp.init(vn, k) 30 | 31 | // Iterate until we are done 32 | s1 := cp.Next() 33 | if s1 != v2 { 34 | t.Fatalf("Expect v2. %v", s1) 35 | } 36 | 37 | s2 := cp.Next() 38 | if s2 != v1 { 39 | t.Fatalf("Expect v1. %v", s2) 40 | } 41 | 42 | s3 := cp.Next() 43 | if s3 != v7 { 44 | t.Fatalf("Expect v7. %v", s3) 45 | } 46 | 47 | s4 := cp.Next() 48 | if s4 != v6 { 49 | t.Fatalf("Expect v6. %v", s4) 50 | } 51 | 52 | s5 := cp.Next() 53 | if s5 != nil { 54 | t.Fatalf("Expect nil. %v", s5) 55 | } 56 | } 57 | 58 | func TestNextClosestNoSucc(t *testing.T) { 59 | // Make the vnodes on the ring (mod 64) 60 | v1 := &Vnode{Id: []byte{1}} 61 | v2 := &Vnode{Id: []byte{10}} 62 | //v3 := &Vnode{Id: []byte{20}} 63 | v4 := &Vnode{Id: []byte{32}} 64 | //v5 := &Vnode{Id: []byte{40}} 65 | v6 := &Vnode{Id: []byte{59}} 66 | v7 := &Vnode{Id: []byte{62}} 67 | 68 | // Make a vnode 69 | vn := &localVnode{} 70 | vn.Id = []byte{54} 71 | vn.successors = []*Vnode{nil} 72 | vn.finger = []*Vnode{v6, v6, v7, v1, v2, v4, nil} 73 | vn.ring = &Ring{} 74 | vn.ring.config = &Config{hashBits: 6} 75 | 76 | // Make an iterator 77 | k := []byte{32} 78 | cp := &closestPreceedingVnodeIterator{} 79 | cp.init(vn, k) 80 | 81 | // Iterate until we are done 82 | s1 := cp.Next() 83 | if s1 != v2 { 84 | t.Fatalf("Expect v2. %v", s1) 85 | } 86 | 87 | s2 := cp.Next() 88 | if s2 != v1 { 89 | t.Fatalf("Expect v1. %v", s2) 90 | } 91 | 92 | s3 := cp.Next() 93 | if s3 != v7 { 94 | t.Fatalf("Expect v7. %v", s3) 95 | } 96 | 97 | s4 := cp.Next() 98 | if s4 != v6 { 99 | t.Fatalf("Expect v6. %v", s4) 100 | } 101 | 102 | s5 := cp.Next() 103 | if s5 != nil { 104 | t.Fatalf("Expect nil. %v", s5) 105 | } 106 | } 107 | 108 | func TestNextClosestNoFinger(t *testing.T) { 109 | // Make the vnodes on the ring (mod 64) 110 | //v1 := &Vnode{Id: []byte{1}} 111 | //v2 := &Vnode{Id: []byte{10}} 112 | //v3 := &Vnode{Id: []byte{20}} 113 | //v4 := &Vnode{Id: []byte{32}} 114 | //v5 := &Vnode{Id: []byte{40}} 115 | v6 := &Vnode{Id: []byte{59}} 116 | v7 := &Vnode{Id: []byte{62}} 117 | 118 | // Make a vnode 119 | vn := &localVnode{} 120 | vn.Id = []byte{54} 121 | vn.successors = []*Vnode{v6, v7, v7, nil} 122 | vn.finger = []*Vnode{nil, nil, nil} 123 | vn.ring = &Ring{} 124 | vn.ring.config = &Config{hashBits: 6} 125 | 126 | // Make an iterator 127 | k := []byte{32} 128 | cp := &closestPreceedingVnodeIterator{} 129 | cp.init(vn, k) 130 | 131 | // Iterate until we are done 132 | s3 := cp.Next() 133 | if s3 != v7 { 134 | t.Fatalf("Expect v7. %v", s3) 135 | } 136 | 137 | s4 := cp.Next() 138 | if s4 != v6 { 139 | t.Fatalf("Expect v6. %v", s4) 140 | } 141 | 142 | s5 := cp.Next() 143 | if s5 != nil { 144 | t.Fatalf("Expect nil. %v", s5) 145 | } 146 | } 147 | 148 | func TestClosest(t *testing.T) { 149 | a := &Vnode{Id: []byte{128}} 150 | b := &Vnode{Id: []byte{32}} 151 | k := []byte{45} 152 | c := closest_preceeding_vnode(a, b, k, 8) 153 | if c != b { 154 | t.Fatalf("expect b to be closer!") 155 | } 156 | c = closest_preceeding_vnode(b, a, k, 8) 157 | if c != b { 158 | t.Fatalf("expect b to be closer!") 159 | } 160 | } 161 | 162 | func TestDistance(t *testing.T) { 163 | a := []byte{63} 164 | b := []byte{3} 165 | d := distance(a, b, 6) // Ring size of 64 166 | if d.Cmp(big.NewInt(4)) != 0 { 167 | t.Fatalf("expect distance 4! %v", d) 168 | } 169 | 170 | a = []byte{0} 171 | b = []byte{65} 172 | d = distance(a, b, 7) // Ring size of 128 173 | if d.Cmp(big.NewInt(65)) != 0 { 174 | t.Fatalf("expect distance 65! %v", d) 175 | } 176 | 177 | a = []byte{1} 178 | b = []byte{255} 179 | d = distance(a, b, 8) // Ring size of 256 180 | if d.Cmp(big.NewInt(254)) != 0 { 181 | t.Fatalf("expect distance 254! %v", d) 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /net.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "encoding/gob" 5 | "fmt" 6 | "log" 7 | "net" 8 | "sync" 9 | "sync/atomic" 10 | "time" 11 | ) 12 | 13 | /* 14 | TCPTransport provides a TCP based Chord transport layer. This allows Chord 15 | to be implemented over a network, instead of only using the LocalTransport. It is 16 | meant to be a simple implementation, optimizing for simplicity instead of performance. 17 | Messages are sent with a header frame, followed by a body frame. All data is encoded 18 | using the GOB format for simplicity. 19 | 20 | Internally, there is 1 Goroutine listening for inbound connections, 1 Goroutine PER 21 | inbound connection. 22 | */ 23 | type TCPTransport struct { 24 | sock *net.TCPListener 25 | timeout time.Duration 26 | maxIdle time.Duration 27 | lock sync.RWMutex 28 | local map[string]*localRPC 29 | inbound map[*net.TCPConn]struct{} 30 | poolLock sync.Mutex 31 | pool map[string][]*tcpOutConn 32 | shutdown int32 33 | } 34 | 35 | type tcpOutConn struct { 36 | host string 37 | sock *net.TCPConn 38 | header tcpHeader 39 | enc *gob.Encoder 40 | dec *gob.Decoder 41 | used time.Time 42 | } 43 | 44 | const ( 45 | tcpPing = iota 46 | tcpListReq 47 | tcpGetPredReq 48 | tcpNotifyReq 49 | tcpFindSucReq 50 | tcpClearPredReq 51 | tcpSkipSucReq 52 | ) 53 | 54 | type tcpHeader struct { 55 | ReqType int 56 | } 57 | 58 | // Potential body types 59 | type tcpBodyError struct { 60 | Err error 61 | } 62 | type tcpBodyString struct { 63 | S string 64 | } 65 | type tcpBodyVnode struct { 66 | Vn *Vnode 67 | } 68 | type tcpBodyTwoVnode struct { 69 | Target *Vnode 70 | Vn *Vnode 71 | } 72 | type tcpBodyFindSuc struct { 73 | Target *Vnode 74 | Num int 75 | Key []byte 76 | } 77 | type tcpBodyVnodeError struct { 78 | Vnode *Vnode 79 | Err error 80 | } 81 | type tcpBodyVnodeListError struct { 82 | Vnodes []*Vnode 83 | Err error 84 | } 85 | type tcpBodyBoolError struct { 86 | B bool 87 | Err error 88 | } 89 | 90 | // Creates a new TCP transport on the given listen address with the 91 | // configured timeout duration. 92 | func InitTCPTransport(listen string, timeout time.Duration) (*TCPTransport, error) { 93 | // Try to start the listener 94 | sock, err := net.Listen("tcp", listen) 95 | if err != nil { 96 | return nil, err 97 | } 98 | 99 | // allocate maps 100 | local := make(map[string]*localRPC) 101 | inbound := make(map[*net.TCPConn]struct{}) 102 | pool := make(map[string][]*tcpOutConn) 103 | 104 | // Maximum age of a connection 105 | maxIdle := time.Duration(300 * time.Second) 106 | 107 | // Setup the transport 108 | tcp := &TCPTransport{sock: sock.(*net.TCPListener), 109 | timeout: timeout, 110 | maxIdle: maxIdle, 111 | local: local, 112 | inbound: inbound, 113 | pool: pool} 114 | 115 | // Listen for connections 116 | go tcp.listen() 117 | 118 | // Reap old connections 119 | go tcp.reapOld() 120 | 121 | // Done 122 | return tcp, nil 123 | } 124 | 125 | // Checks for a local vnode 126 | func (t *TCPTransport) get(vn *Vnode) (VnodeRPC, bool) { 127 | key := vn.String() 128 | t.lock.RLock() 129 | defer t.lock.RUnlock() 130 | w, ok := t.local[key] 131 | if ok { 132 | return w.obj, ok 133 | } else { 134 | return nil, ok 135 | } 136 | } 137 | 138 | // Gets an outbound connection to a host 139 | func (t *TCPTransport) getConn(host string) (*tcpOutConn, error) { 140 | // Check if we have a conn cached 141 | var out *tcpOutConn 142 | t.poolLock.Lock() 143 | if atomic.LoadInt32(&t.shutdown) == 1 { 144 | t.poolLock.Unlock() 145 | return nil, fmt.Errorf("TCP transport is shutdown") 146 | } 147 | list, ok := t.pool[host] 148 | if ok && len(list) > 0 { 149 | out = list[len(list)-1] 150 | list = list[:len(list)-1] 151 | t.pool[host] = list 152 | } 153 | t.poolLock.Unlock() 154 | if out != nil { 155 | // Verify that the socket is valid. Might be closed. 156 | if _, err := out.sock.Read(nil); err == nil { 157 | return out, nil 158 | } 159 | out.sock.Close() 160 | } 161 | 162 | // Try to establish a connection 163 | conn, err := net.DialTimeout("tcp", host, t.timeout) 164 | if err != nil { 165 | return nil, err 166 | } 167 | 168 | // Setup the socket 169 | sock := conn.(*net.TCPConn) 170 | t.setupConn(sock) 171 | enc := gob.NewEncoder(sock) 172 | dec := gob.NewDecoder(sock) 173 | now := time.Now() 174 | 175 | // Wrap the sock 176 | out = &tcpOutConn{host: host, sock: sock, enc: enc, dec: dec, used: now} 177 | return out, nil 178 | } 179 | 180 | // Returns an outbound TCP connection to the pool 181 | func (t *TCPTransport) returnConn(o *tcpOutConn) { 182 | // Update the last used time 183 | o.used = time.Now() 184 | 185 | // Push back into the pool 186 | t.poolLock.Lock() 187 | defer t.poolLock.Unlock() 188 | if atomic.LoadInt32(&t.shutdown) == 1 { 189 | o.sock.Close() 190 | return 191 | } 192 | list, _ := t.pool[o.host] 193 | t.pool[o.host] = append(list, o) 194 | } 195 | 196 | // Setup a connection 197 | func (t *TCPTransport) setupConn(c *net.TCPConn) { 198 | c.SetNoDelay(true) 199 | c.SetKeepAlive(true) 200 | } 201 | 202 | // Gets a list of the vnodes on the box 203 | func (t *TCPTransport) ListVnodes(host string) ([]*Vnode, error) { 204 | // Get a conn 205 | out, err := t.getConn(host) 206 | if err != nil { 207 | return nil, err 208 | } 209 | 210 | // Response channels 211 | respChan := make(chan []*Vnode, 1) 212 | errChan := make(chan error, 1) 213 | 214 | go func() { 215 | // Send a list command 216 | out.header.ReqType = tcpListReq 217 | body := tcpBodyString{S: host} 218 | if err := out.enc.Encode(&out.header); err != nil { 219 | errChan <- err 220 | return 221 | } 222 | if err := out.enc.Encode(&body); err != nil { 223 | errChan <- err 224 | return 225 | } 226 | 227 | // Read in the response 228 | resp := tcpBodyVnodeListError{} 229 | if err := out.dec.Decode(&resp); err != nil { 230 | errChan <- err 231 | } 232 | 233 | // Return the connection 234 | t.returnConn(out) 235 | if resp.Err == nil { 236 | respChan <- resp.Vnodes 237 | } else { 238 | errChan <- resp.Err 239 | } 240 | }() 241 | 242 | select { 243 | case <-time.After(t.timeout): 244 | return nil, fmt.Errorf("Command timed out!") 245 | case err := <-errChan: 246 | return nil, err 247 | case res := <-respChan: 248 | return res, nil 249 | } 250 | } 251 | 252 | // Ping a Vnode, check for liveness 253 | func (t *TCPTransport) Ping(vn *Vnode) (bool, error) { 254 | // Get a conn 255 | out, err := t.getConn(vn.Host) 256 | if err != nil { 257 | return false, err 258 | } 259 | 260 | // Response channels 261 | respChan := make(chan bool, 1) 262 | errChan := make(chan error, 1) 263 | 264 | go func() { 265 | // Send a list command 266 | out.header.ReqType = tcpPing 267 | body := tcpBodyVnode{Vn: vn} 268 | if err := out.enc.Encode(&out.header); err != nil { 269 | errChan <- err 270 | return 271 | } 272 | if err := out.enc.Encode(&body); err != nil { 273 | errChan <- err 274 | return 275 | } 276 | 277 | // Read in the response 278 | resp := tcpBodyBoolError{} 279 | if err := out.dec.Decode(&resp); err != nil { 280 | errChan <- err 281 | return 282 | } 283 | 284 | // Return the connection 285 | t.returnConn(out) 286 | if resp.Err == nil { 287 | respChan <- resp.B 288 | } else { 289 | errChan <- resp.Err 290 | } 291 | }() 292 | 293 | select { 294 | case <-time.After(t.timeout): 295 | return false, fmt.Errorf("Command timed out!") 296 | case err := <-errChan: 297 | return false, err 298 | case res := <-respChan: 299 | return res, nil 300 | } 301 | } 302 | 303 | // Request a nodes predecessor 304 | func (t *TCPTransport) GetPredecessor(vn *Vnode) (*Vnode, error) { 305 | // Get a conn 306 | out, err := t.getConn(vn.Host) 307 | if err != nil { 308 | return nil, err 309 | } 310 | 311 | respChan := make(chan *Vnode, 1) 312 | errChan := make(chan error, 1) 313 | 314 | go func() { 315 | // Send a list command 316 | out.header.ReqType = tcpGetPredReq 317 | body := tcpBodyVnode{Vn: vn} 318 | if err := out.enc.Encode(&out.header); err != nil { 319 | errChan <- err 320 | return 321 | } 322 | if err := out.enc.Encode(&body); err != nil { 323 | errChan <- err 324 | return 325 | } 326 | 327 | // Read in the response 328 | resp := tcpBodyVnodeError{} 329 | if err := out.dec.Decode(&resp); err != nil { 330 | errChan <- err 331 | return 332 | } 333 | 334 | // Return the connection 335 | t.returnConn(out) 336 | if resp.Err == nil { 337 | respChan <- resp.Vnode 338 | } else { 339 | errChan <- resp.Err 340 | } 341 | }() 342 | 343 | select { 344 | case <-time.After(t.timeout): 345 | return nil, fmt.Errorf("Command timed out!") 346 | case err := <-errChan: 347 | return nil, err 348 | case res := <-respChan: 349 | return res, nil 350 | } 351 | } 352 | 353 | // Notify our successor of ourselves 354 | func (t *TCPTransport) Notify(target, self *Vnode) ([]*Vnode, error) { 355 | // Get a conn 356 | out, err := t.getConn(target.Host) 357 | if err != nil { 358 | return nil, err 359 | } 360 | 361 | respChan := make(chan []*Vnode, 1) 362 | errChan := make(chan error, 1) 363 | 364 | go func() { 365 | // Send a list command 366 | out.header.ReqType = tcpNotifyReq 367 | body := tcpBodyTwoVnode{Target: target, Vn: self} 368 | if err := out.enc.Encode(&out.header); err != nil { 369 | errChan <- err 370 | return 371 | } 372 | if err := out.enc.Encode(&body); err != nil { 373 | errChan <- err 374 | return 375 | } 376 | 377 | // Read in the response 378 | resp := tcpBodyVnodeListError{} 379 | if err := out.dec.Decode(&resp); err != nil { 380 | errChan <- err 381 | return 382 | } 383 | 384 | // Return the connection 385 | t.returnConn(out) 386 | if resp.Err == nil { 387 | respChan <- resp.Vnodes 388 | } else { 389 | errChan <- resp.Err 390 | } 391 | }() 392 | 393 | select { 394 | case <-time.After(t.timeout): 395 | return nil, fmt.Errorf("Command timed out!") 396 | case err := <-errChan: 397 | return nil, err 398 | case res := <-respChan: 399 | return res, nil 400 | } 401 | } 402 | 403 | // Find a successor 404 | func (t *TCPTransport) FindSuccessors(vn *Vnode, n int, k []byte) ([]*Vnode, error) { 405 | // Get a conn 406 | out, err := t.getConn(vn.Host) 407 | if err != nil { 408 | return nil, err 409 | } 410 | 411 | respChan := make(chan []*Vnode, 1) 412 | errChan := make(chan error, 1) 413 | 414 | go func() { 415 | // Send a list command 416 | out.header.ReqType = tcpFindSucReq 417 | body := tcpBodyFindSuc{Target: vn, Num: n, Key: k} 418 | if err := out.enc.Encode(&out.header); err != nil { 419 | errChan <- err 420 | return 421 | } 422 | if err := out.enc.Encode(&body); err != nil { 423 | errChan <- err 424 | return 425 | } 426 | 427 | // Read in the response 428 | resp := tcpBodyVnodeListError{} 429 | if err := out.dec.Decode(&resp); err != nil { 430 | errChan <- err 431 | return 432 | } 433 | 434 | // Return the connection 435 | t.returnConn(out) 436 | if resp.Err == nil { 437 | respChan <- resp.Vnodes 438 | } else { 439 | errChan <- resp.Err 440 | } 441 | }() 442 | 443 | select { 444 | case <-time.After(t.timeout): 445 | return nil, fmt.Errorf("Command timed out!") 446 | case err := <-errChan: 447 | return nil, err 448 | case res := <-respChan: 449 | return res, nil 450 | } 451 | } 452 | 453 | // Clears a predecessor if it matches a given vnode. Used to leave. 454 | func (t *TCPTransport) ClearPredecessor(target, self *Vnode) error { 455 | // Get a conn 456 | out, err := t.getConn(target.Host) 457 | if err != nil { 458 | return err 459 | } 460 | 461 | respChan := make(chan bool, 1) 462 | errChan := make(chan error, 1) 463 | 464 | go func() { 465 | // Send a list command 466 | out.header.ReqType = tcpClearPredReq 467 | body := tcpBodyTwoVnode{Target: target, Vn: self} 468 | if err := out.enc.Encode(&out.header); err != nil { 469 | errChan <- err 470 | return 471 | } 472 | if err := out.enc.Encode(&body); err != nil { 473 | errChan <- err 474 | return 475 | } 476 | 477 | // Read in the response 478 | resp := tcpBodyError{} 479 | if err := out.dec.Decode(&resp); err != nil { 480 | errChan <- err 481 | return 482 | } 483 | 484 | // Return the connection 485 | t.returnConn(out) 486 | if resp.Err == nil { 487 | respChan <- true 488 | } else { 489 | errChan <- resp.Err 490 | } 491 | }() 492 | 493 | select { 494 | case <-time.After(t.timeout): 495 | return fmt.Errorf("Command timed out!") 496 | case err := <-errChan: 497 | return err 498 | case <-respChan: 499 | return nil 500 | } 501 | } 502 | 503 | // Instructs a node to skip a given successor. Used to leave. 504 | func (t *TCPTransport) SkipSuccessor(target, self *Vnode) error { 505 | // Get a conn 506 | out, err := t.getConn(target.Host) 507 | if err != nil { 508 | return err 509 | } 510 | 511 | respChan := make(chan bool, 1) 512 | errChan := make(chan error, 1) 513 | 514 | go func() { 515 | // Send a list command 516 | out.header.ReqType = tcpSkipSucReq 517 | body := tcpBodyTwoVnode{Target: target, Vn: self} 518 | if err := out.enc.Encode(&out.header); err != nil { 519 | errChan <- err 520 | return 521 | } 522 | if err := out.enc.Encode(&body); err != nil { 523 | errChan <- err 524 | return 525 | } 526 | 527 | // Read in the response 528 | resp := tcpBodyError{} 529 | if err := out.dec.Decode(&resp); err != nil { 530 | errChan <- err 531 | return 532 | } 533 | 534 | // Return the connection 535 | t.returnConn(out) 536 | if resp.Err == nil { 537 | respChan <- true 538 | } else { 539 | errChan <- resp.Err 540 | } 541 | }() 542 | 543 | select { 544 | case <-time.After(t.timeout): 545 | return fmt.Errorf("Command timed out!") 546 | case err := <-errChan: 547 | return err 548 | case <-respChan: 549 | return nil 550 | } 551 | } 552 | 553 | // Register for an RPC callbacks 554 | func (t *TCPTransport) Register(v *Vnode, o VnodeRPC) { 555 | key := v.String() 556 | t.lock.Lock() 557 | t.local[key] = &localRPC{v, o} 558 | t.lock.Unlock() 559 | } 560 | 561 | // Shutdown the TCP transport 562 | func (t *TCPTransport) Shutdown() { 563 | atomic.StoreInt32(&t.shutdown, 1) 564 | t.sock.Close() 565 | 566 | // Close all the inbound connections 567 | t.lock.RLock() 568 | for conn := range t.inbound { 569 | conn.Close() 570 | } 571 | t.lock.RUnlock() 572 | 573 | // Close all the outbound 574 | t.poolLock.Lock() 575 | for _, conns := range t.pool { 576 | for _, out := range conns { 577 | out.sock.Close() 578 | } 579 | } 580 | t.pool = nil 581 | t.poolLock.Unlock() 582 | } 583 | 584 | // Closes old outbound connections 585 | func (t *TCPTransport) reapOld() { 586 | for { 587 | if atomic.LoadInt32(&t.shutdown) == 1 { 588 | return 589 | } 590 | time.Sleep(30 * time.Second) 591 | t.reapOnce() 592 | } 593 | } 594 | 595 | func (t *TCPTransport) reapOnce() { 596 | t.poolLock.Lock() 597 | defer t.poolLock.Unlock() 598 | for host, conns := range t.pool { 599 | max := len(conns) 600 | for i := 0; i < max; i++ { 601 | if time.Since(conns[i].used) > t.maxIdle { 602 | conns[i].sock.Close() 603 | conns[i], conns[max-1] = conns[max-1], nil 604 | max-- 605 | i-- 606 | } 607 | } 608 | // Trim any idle conns 609 | t.pool[host] = conns[:max] 610 | } 611 | } 612 | 613 | // Listens for inbound connections 614 | func (t *TCPTransport) listen() { 615 | for { 616 | conn, err := t.sock.AcceptTCP() 617 | if err != nil { 618 | if atomic.LoadInt32(&t.shutdown) == 0 { 619 | fmt.Printf("[ERR] Error accepting TCP connection! %s", err) 620 | continue 621 | } else { 622 | return 623 | } 624 | } 625 | 626 | // Setup the conn 627 | t.setupConn(conn) 628 | 629 | // Register the inbound conn 630 | t.lock.Lock() 631 | t.inbound[conn] = struct{}{} 632 | t.lock.Unlock() 633 | 634 | // Start handler 635 | go t.handleConn(conn) 636 | } 637 | } 638 | 639 | // Handles inbound TCP connections 640 | func (t *TCPTransport) handleConn(conn *net.TCPConn) { 641 | // Defer the cleanup 642 | defer func() { 643 | t.lock.Lock() 644 | delete(t.inbound, conn) 645 | t.lock.Unlock() 646 | conn.Close() 647 | }() 648 | 649 | dec := gob.NewDecoder(conn) 650 | enc := gob.NewEncoder(conn) 651 | header := tcpHeader{} 652 | var sendResp interface{} 653 | for { 654 | // Get the header 655 | if err := dec.Decode(&header); err != nil { 656 | if atomic.LoadInt32(&t.shutdown) == 0 && err.Error() != "EOF" { 657 | log.Printf("[ERR] Failed to decode TCP header! Got %s", err) 658 | } 659 | return 660 | } 661 | 662 | // Read in the body and process request 663 | switch header.ReqType { 664 | case tcpPing: 665 | body := tcpBodyVnode{} 666 | if err := dec.Decode(&body); err != nil { 667 | log.Printf("[ERR] Failed to decode TCP body! Got %s", err) 668 | return 669 | } 670 | 671 | // Generate a response 672 | _, ok := t.get(body.Vn) 673 | if ok { 674 | sendResp = tcpBodyBoolError{B: ok, Err: nil} 675 | } else { 676 | sendResp = tcpBodyBoolError{B: ok, Err: fmt.Errorf("Target VN not found! Target %s:%s", 677 | body.Vn.Host, body.Vn.String())} 678 | } 679 | 680 | case tcpListReq: 681 | body := tcpBodyString{} 682 | if err := dec.Decode(&body); err != nil { 683 | log.Printf("[ERR] Failed to decode TCP body! Got %s", err) 684 | return 685 | } 686 | 687 | // Generate all the local clients 688 | res := make([]*Vnode, 0, len(t.local)) 689 | 690 | // Build list 691 | t.lock.RLock() 692 | for _, v := range t.local { 693 | res = append(res, v.vnode) 694 | } 695 | t.lock.RUnlock() 696 | 697 | // Make response 698 | sendResp = tcpBodyVnodeListError{Vnodes: trimSlice(res)} 699 | 700 | case tcpGetPredReq: 701 | body := tcpBodyVnode{} 702 | if err := dec.Decode(&body); err != nil { 703 | log.Printf("[ERR] Failed to decode TCP body! Got %s", err) 704 | return 705 | } 706 | 707 | // Generate a response 708 | obj, ok := t.get(body.Vn) 709 | resp := tcpBodyVnodeError{} 710 | sendResp = &resp 711 | if ok { 712 | node, err := obj.GetPredecessor() 713 | resp.Vnode = node 714 | resp.Err = err 715 | } else { 716 | resp.Err = fmt.Errorf("Target VN not found! Target %s:%s", 717 | body.Vn.Host, body.Vn.String()) 718 | } 719 | 720 | case tcpNotifyReq: 721 | body := tcpBodyTwoVnode{} 722 | if err := dec.Decode(&body); err != nil { 723 | log.Printf("[ERR] Failed to decode TCP body! Got %s", err) 724 | return 725 | } 726 | if body.Target == nil { 727 | return 728 | } 729 | 730 | // Generate a response 731 | obj, ok := t.get(body.Target) 732 | resp := tcpBodyVnodeListError{} 733 | sendResp = &resp 734 | if ok { 735 | nodes, err := obj.Notify(body.Vn) 736 | resp.Vnodes = trimSlice(nodes) 737 | resp.Err = err 738 | } else { 739 | resp.Err = fmt.Errorf("Target VN not found! Target %s:%s", 740 | body.Target.Host, body.Target.String()) 741 | } 742 | 743 | case tcpFindSucReq: 744 | body := tcpBodyFindSuc{} 745 | if err := dec.Decode(&body); err != nil { 746 | log.Printf("[ERR] Failed to decode TCP body! Got %s", err) 747 | return 748 | } 749 | 750 | // Generate a response 751 | obj, ok := t.get(body.Target) 752 | resp := tcpBodyVnodeListError{} 753 | sendResp = &resp 754 | if ok { 755 | nodes, err := obj.FindSuccessors(body.Num, body.Key) 756 | resp.Vnodes = trimSlice(nodes) 757 | resp.Err = err 758 | } else { 759 | resp.Err = fmt.Errorf("Target VN not found! Target %s:%s", 760 | body.Target.Host, body.Target.String()) 761 | } 762 | 763 | case tcpClearPredReq: 764 | body := tcpBodyTwoVnode{} 765 | if err := dec.Decode(&body); err != nil { 766 | log.Printf("[ERR] Failed to decode TCP body! Got %s", err) 767 | return 768 | } 769 | 770 | // Generate a response 771 | obj, ok := t.get(body.Target) 772 | resp := tcpBodyError{} 773 | sendResp = &resp 774 | if ok { 775 | resp.Err = obj.ClearPredecessor(body.Vn) 776 | } else { 777 | resp.Err = fmt.Errorf("Target VN not found! Target %s:%s", 778 | body.Target.Host, body.Target.String()) 779 | } 780 | 781 | case tcpSkipSucReq: 782 | body := tcpBodyTwoVnode{} 783 | if err := dec.Decode(&body); err != nil { 784 | log.Printf("[ERR] Failed to decode TCP body! Got %s", err) 785 | return 786 | } 787 | 788 | // Generate a response 789 | obj, ok := t.get(body.Target) 790 | resp := tcpBodyError{} 791 | sendResp = &resp 792 | if ok { 793 | resp.Err = obj.SkipSuccessor(body.Vn) 794 | } else { 795 | resp.Err = fmt.Errorf("Target VN not found! Target %s:%s", 796 | body.Target.Host, body.Target.String()) 797 | } 798 | 799 | default: 800 | log.Printf("[ERR] Unknown request type! Got %d", header.ReqType) 801 | return 802 | } 803 | 804 | // Send the response 805 | if err := enc.Encode(sendResp); err != nil { 806 | log.Printf("[ERR] Failed to send TCP body! Got %s", err) 807 | return 808 | } 809 | } 810 | } 811 | 812 | // Trims the slice to remove nil elements 813 | func trimSlice(vn []*Vnode) []*Vnode { 814 | if vn == nil { 815 | return vn 816 | } 817 | 818 | // Find a non-nil index 819 | idx := len(vn) - 1 820 | for vn[idx] == nil { 821 | idx-- 822 | } 823 | return vn[:idx+1] 824 | } 825 | -------------------------------------------------------------------------------- /net_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func prepRing(port int) (*Config, *TCPTransport, error) { 10 | listen := fmt.Sprintf("localhost:%d", port) 11 | conf := DefaultConfig(listen) 12 | conf.StabilizeMin = time.Duration(15 * time.Millisecond) 13 | conf.StabilizeMax = time.Duration(45 * time.Millisecond) 14 | timeout := time.Duration(20 * time.Millisecond) 15 | trans, err := InitTCPTransport(listen, timeout) 16 | if err != nil { 17 | return nil, nil, err 18 | } 19 | return conf, trans, nil 20 | } 21 | 22 | func TestTCPJoin(t *testing.T) { 23 | // Prepare to create 2 nodes 24 | c1, t1, err := prepRing(10025) 25 | if err != nil { 26 | t.Fatalf("unexpected err. %s", err) 27 | } 28 | c2, t2, err := prepRing(10026) 29 | if err != nil { 30 | t.Fatalf("unexpected err. %s", err) 31 | } 32 | 33 | // Create initial ring 34 | r1, err := Create(c1, t1) 35 | if err != nil { 36 | t.Fatalf("unexpected err. %s", err) 37 | } 38 | 39 | // Join ring 40 | r2, err := Join(c2, t2, c1.Hostname) 41 | if err != nil { 42 | t.Fatalf("failed to join local node! Got %s", err) 43 | } 44 | 45 | // Shutdown 46 | r1.Shutdown() 47 | r2.Shutdown() 48 | t1.Shutdown() 49 | t2.Shutdown() 50 | } 51 | 52 | func TestTCPLeave(t *testing.T) { 53 | // Prepare to create 2 nodes 54 | c1, t1, err := prepRing(10027) 55 | if err != nil { 56 | t.Fatalf("unexpected err. %s", err) 57 | } 58 | c2, t2, err := prepRing(10028) 59 | if err != nil { 60 | t.Fatalf("unexpected err. %s", err) 61 | } 62 | 63 | // Create initial ring 64 | r1, err := Create(c1, t1) 65 | if err != nil { 66 | t.Fatalf("unexpected err. %s", err) 67 | } 68 | 69 | // Join ring 70 | r2, err := Join(c2, t2, c1.Hostname) 71 | if err != nil { 72 | t.Fatalf("failed to join local node! Got %s", err) 73 | } 74 | 75 | // Wait for some stabilization 76 | <-time.After(100 * time.Millisecond) 77 | 78 | // Node 1 should leave 79 | r1.Leave() 80 | t1.Shutdown() 81 | 82 | // Wait for stabilization 83 | <-time.After(100 * time.Millisecond) 84 | 85 | // Verify r2 ring is still in tact 86 | for _, vn := range r2.vnodes { 87 | if vn.successors[0].Host != r2.config.Hostname { 88 | t.Fatalf("bad successor! Got:%s:%s", vn.successors[0].Host, 89 | vn.successors[0]) 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /ring.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "bytes" 5 | "log" 6 | "sort" 7 | ) 8 | 9 | func (r *Ring) init(conf *Config, trans Transport) { 10 | // Set our variables 11 | r.config = conf 12 | r.vnodes = make([]*localVnode, conf.NumVnodes) 13 | r.transport = InitLocalTransport(trans) 14 | r.delegateCh = make(chan func(), 32) 15 | 16 | // Initializes the vnodes 17 | for i := 0; i < conf.NumVnodes; i++ { 18 | vn := &localVnode{} 19 | r.vnodes[i] = vn 20 | vn.ring = r 21 | vn.init(i) 22 | } 23 | 24 | // Sort the vnodes 25 | sort.Sort(r) 26 | } 27 | 28 | // Len is the number of vnodes 29 | func (r *Ring) Len() int { 30 | return len(r.vnodes) 31 | } 32 | 33 | // Less returns whether the vnode with index i should sort 34 | // before the vnode with index j. 35 | func (r *Ring) Less(i, j int) bool { 36 | return bytes.Compare(r.vnodes[i].Id, r.vnodes[j].Id) == -1 37 | } 38 | 39 | // Swap swaps the vnodes with indexes i and j. 40 | func (r *Ring) Swap(i, j int) { 41 | r.vnodes[i], r.vnodes[j] = r.vnodes[j], r.vnodes[i] 42 | } 43 | 44 | // Returns the nearest local vnode to the key 45 | func (r *Ring) nearestVnode(key []byte) *localVnode { 46 | for i := len(r.vnodes) - 1; i >= 0; i-- { 47 | if bytes.Compare(r.vnodes[i].Id, key) == -1 { 48 | return r.vnodes[i] 49 | } 50 | } 51 | // Return the last vnode 52 | return r.vnodes[len(r.vnodes)-1] 53 | } 54 | 55 | // Schedules each vnode in the ring 56 | func (r *Ring) schedule() { 57 | if r.config.Delegate != nil { 58 | go r.delegateHandler() 59 | } 60 | for i := 0; i < len(r.vnodes); i++ { 61 | r.vnodes[i].schedule() 62 | } 63 | } 64 | 65 | // Wait for all the vnodes to shutdown 66 | func (r *Ring) stopVnodes() { 67 | r.shutdown = make(chan bool, r.config.NumVnodes) 68 | for i := 0; i < r.config.NumVnodes; i++ { 69 | <-r.shutdown 70 | } 71 | } 72 | 73 | // Stops the delegate handler 74 | func (r *Ring) stopDelegate() { 75 | if r.config.Delegate != nil { 76 | // Wait for all delegate messages to be processed 77 | <-r.invokeDelegate(r.config.Delegate.Shutdown) 78 | close(r.delegateCh) 79 | } 80 | } 81 | 82 | // Initializes the vnodes with their local successors 83 | func (r *Ring) setLocalSuccessors() { 84 | numV := len(r.vnodes) 85 | numSuc := min(r.config.NumSuccessors, numV-1) 86 | for idx, vnode := range r.vnodes { 87 | for i := 0; i < numSuc; i++ { 88 | vnode.successors[i] = &r.vnodes[(idx+i+1)%numV].Vnode 89 | } 90 | } 91 | } 92 | 93 | // Invokes a function on the delegate and returns completion channel 94 | func (r *Ring) invokeDelegate(f func()) chan struct{} { 95 | if r.config.Delegate == nil { 96 | return nil 97 | } 98 | 99 | ch := make(chan struct{}, 1) 100 | wrapper := func() { 101 | defer func() { 102 | ch <- struct{}{} 103 | }() 104 | f() 105 | } 106 | 107 | r.delegateCh <- wrapper 108 | return ch 109 | } 110 | 111 | // This handler runs in a go routine to invoke methods on the delegate 112 | func (r *Ring) delegateHandler() { 113 | for { 114 | f, ok := <-r.delegateCh 115 | if !ok { 116 | break 117 | } 118 | r.safeInvoke(f) 119 | } 120 | } 121 | 122 | // Called to safely call a function on the delegate 123 | func (r *Ring) safeInvoke(f func()) { 124 | defer func() { 125 | if r := recover(); r != nil { 126 | log.Printf("Caught a panic invoking a delegate function! Got: %s", r) 127 | } 128 | }() 129 | f() 130 | } 131 | -------------------------------------------------------------------------------- /ring_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "bytes" 5 | "crypto/sha1" 6 | "sort" 7 | "testing" 8 | "time" 9 | ) 10 | 11 | type MockDelegate struct { 12 | shutdown bool 13 | } 14 | 15 | func (m *MockDelegate) NewPredecessor(local, remoteNew, remotePrev *Vnode) { 16 | } 17 | func (m *MockDelegate) Leaving(local, pred, succ *Vnode) { 18 | } 19 | func (m *MockDelegate) PredecessorLeaving(local, remote *Vnode) { 20 | } 21 | func (m *MockDelegate) SuccessorLeaving(local, remote *Vnode) { 22 | } 23 | func (m *MockDelegate) Shutdown() { 24 | m.shutdown = true 25 | } 26 | 27 | func makeRing() *Ring { 28 | conf := &Config{ 29 | NumVnodes: 5, 30 | NumSuccessors: 8, 31 | HashFunc: sha1.New, 32 | hashBits: 160, 33 | StabilizeMin: time.Second, 34 | StabilizeMax: 5 * time.Second, 35 | } 36 | 37 | ring := &Ring{} 38 | ring.init(conf, nil) 39 | return ring 40 | } 41 | 42 | func TestRingInit(t *testing.T) { 43 | // Create a ring 44 | ring := &Ring{} 45 | conf := DefaultConfig("test") 46 | ring.init(conf, nil) 47 | 48 | // Test features 49 | if ring.config != conf { 50 | t.Fatalf("wrong config") 51 | } 52 | if ring.transport == nil { 53 | t.Fatalf("missing transport") 54 | } 55 | 56 | // Check the vnodes 57 | for i := 0; i < conf.NumVnodes; i++ { 58 | if ring.vnodes[i] == nil { 59 | t.Fatalf("missing vnode!") 60 | } 61 | if ring.vnodes[i].ring != ring { 62 | t.Fatalf("ring missing!") 63 | } 64 | if ring.vnodes[i].Id == nil { 65 | t.Fatalf("ID not initialized!") 66 | } 67 | } 68 | } 69 | 70 | func TestRingLen(t *testing.T) { 71 | ring := makeRing() 72 | if ring.Len() != 5 { 73 | t.Fatalf("wrong len") 74 | } 75 | } 76 | 77 | func TestRingSort(t *testing.T) { 78 | ring := makeRing() 79 | sort.Sort(ring) 80 | if bytes.Compare(ring.vnodes[0].Id, ring.vnodes[1].Id) != -1 { 81 | t.Fatalf("bad sort") 82 | } 83 | if bytes.Compare(ring.vnodes[1].Id, ring.vnodes[2].Id) != -1 { 84 | t.Fatalf("bad sort") 85 | } 86 | if bytes.Compare(ring.vnodes[2].Id, ring.vnodes[3].Id) != -1 { 87 | t.Fatalf("bad sort") 88 | } 89 | if bytes.Compare(ring.vnodes[3].Id, ring.vnodes[4].Id) != -1 { 90 | t.Fatalf("bad sort") 91 | } 92 | } 93 | 94 | func TestRingNearest(t *testing.T) { 95 | ring := makeRing() 96 | ring.vnodes[0].Id = []byte{2} 97 | ring.vnodes[1].Id = []byte{4} 98 | ring.vnodes[2].Id = []byte{7} 99 | ring.vnodes[3].Id = []byte{10} 100 | ring.vnodes[4].Id = []byte{14} 101 | key := []byte{6} 102 | 103 | near := ring.nearestVnode(key) 104 | if near != ring.vnodes[1] { 105 | t.Fatalf("got wrong node back!") 106 | } 107 | 108 | key = []byte{0} 109 | near = ring.nearestVnode(key) 110 | if near != ring.vnodes[4] { 111 | t.Fatalf("got wrong node back!") 112 | } 113 | } 114 | 115 | func TestRingSchedule(t *testing.T) { 116 | ring := makeRing() 117 | ring.setLocalSuccessors() 118 | ring.schedule() 119 | for i := 0; i < len(ring.vnodes); i++ { 120 | if ring.vnodes[i].timer == nil { 121 | t.Fatalf("expected timer!") 122 | } 123 | } 124 | ring.stopVnodes() 125 | } 126 | 127 | func TestRingSetLocalSucc(t *testing.T) { 128 | ring := makeRing() 129 | ring.setLocalSuccessors() 130 | for i := 0; i < len(ring.vnodes); i++ { 131 | for j := 0; j < 4; j++ { 132 | if ring.vnodes[i].successors[j] == nil { 133 | t.Fatalf("expected successor!") 134 | } 135 | } 136 | if ring.vnodes[i].successors[4] != nil { 137 | t.Fatalf("should not have 5th successor!") 138 | } 139 | } 140 | 141 | // Verify the successor manually for node 3 142 | vn := ring.vnodes[2] 143 | if vn.successors[0] != &ring.vnodes[3].Vnode { 144 | t.Fatalf("bad succ!") 145 | } 146 | if vn.successors[1] != &ring.vnodes[4].Vnode { 147 | t.Fatalf("bad succ!") 148 | } 149 | if vn.successors[2] != &ring.vnodes[0].Vnode { 150 | t.Fatalf("bad succ!") 151 | } 152 | if vn.successors[3] != &ring.vnodes[1].Vnode { 153 | t.Fatalf("bad succ!") 154 | } 155 | } 156 | 157 | func TestRingDelegate(t *testing.T) { 158 | d := &MockDelegate{} 159 | ring := makeRing() 160 | ring.setLocalSuccessors() 161 | ring.config.Delegate = d 162 | ring.schedule() 163 | 164 | var b bool 165 | f := func() { 166 | println("run!") 167 | b = true 168 | } 169 | ch := ring.invokeDelegate(f) 170 | if ch == nil { 171 | t.Fatalf("expected chan") 172 | } 173 | select { 174 | case <-ch: 175 | case <-time.After(time.Second): 176 | t.Fatalf("timeout") 177 | } 178 | if !b { 179 | t.Fatalf("b should be true") 180 | } 181 | 182 | ring.stopDelegate() 183 | if !d.shutdown { 184 | t.Fatalf("delegate did not get shutdown") 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /transport.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | 8 | // Wraps vnode and object 9 | type localRPC struct { 10 | vnode *Vnode 11 | obj VnodeRPC 12 | } 13 | 14 | // LocalTransport is used to provides fast routing to Vnodes running 15 | // locally using direct method calls. For any non-local vnodes, the 16 | // request is passed on to another transport. 17 | type LocalTransport struct { 18 | host string 19 | remote Transport 20 | lock sync.RWMutex 21 | local map[string]*localRPC 22 | } 23 | 24 | // Creates a local transport to wrap a remote transport 25 | func InitLocalTransport(remote Transport) Transport { 26 | // Replace a nil transport with black hole 27 | if remote == nil { 28 | remote = &BlackholeTransport{} 29 | } 30 | 31 | local := make(map[string]*localRPC) 32 | return &LocalTransport{remote: remote, local: local} 33 | } 34 | 35 | // Checks for a local vnode 36 | func (lt *LocalTransport) get(vn *Vnode) (VnodeRPC, bool) { 37 | key := vn.String() 38 | lt.lock.RLock() 39 | defer lt.lock.RUnlock() 40 | w, ok := lt.local[key] 41 | if ok { 42 | return w.obj, ok 43 | } else { 44 | return nil, ok 45 | } 46 | } 47 | 48 | func (lt *LocalTransport) ListVnodes(host string) ([]*Vnode, error) { 49 | // Check if this is a local host 50 | if host == lt.host { 51 | // Generate all the local clients 52 | res := make([]*Vnode, 0, len(lt.local)) 53 | 54 | // Build list 55 | lt.lock.RLock() 56 | for _, v := range lt.local { 57 | res = append(res, v.vnode) 58 | } 59 | lt.lock.RUnlock() 60 | 61 | return res, nil 62 | } 63 | 64 | // Pass onto remote 65 | return lt.remote.ListVnodes(host) 66 | } 67 | 68 | func (lt *LocalTransport) Ping(vn *Vnode) (bool, error) { 69 | // Look for it locally 70 | _, ok := lt.get(vn) 71 | 72 | // If it exists locally, handle it 73 | if ok { 74 | return true, nil 75 | } 76 | 77 | // Pass onto remote 78 | return lt.remote.Ping(vn) 79 | } 80 | 81 | func (lt *LocalTransport) GetPredecessor(vn *Vnode) (*Vnode, error) { 82 | // Look for it locally 83 | obj, ok := lt.get(vn) 84 | 85 | // If it exists locally, handle it 86 | if ok { 87 | return obj.GetPredecessor() 88 | } 89 | 90 | // Pass onto remote 91 | return lt.remote.GetPredecessor(vn) 92 | } 93 | 94 | func (lt *LocalTransport) Notify(vn, self *Vnode) ([]*Vnode, error) { 95 | // Look for it locally 96 | obj, ok := lt.get(vn) 97 | 98 | // If it exists locally, handle it 99 | if ok { 100 | return obj.Notify(self) 101 | } 102 | 103 | // Pass onto remote 104 | return lt.remote.Notify(vn, self) 105 | } 106 | 107 | func (lt *LocalTransport) FindSuccessors(vn *Vnode, n int, key []byte) ([]*Vnode, error) { 108 | // Look for it locally 109 | obj, ok := lt.get(vn) 110 | 111 | // If it exists locally, handle it 112 | if ok { 113 | return obj.FindSuccessors(n, key) 114 | } 115 | 116 | // Pass onto remote 117 | return lt.remote.FindSuccessors(vn, n, key) 118 | } 119 | 120 | func (lt *LocalTransport) ClearPredecessor(target, self *Vnode) error { 121 | // Look for it locally 122 | obj, ok := lt.get(target) 123 | 124 | // If it exists locally, handle it 125 | if ok { 126 | return obj.ClearPredecessor(self) 127 | } 128 | 129 | // Pass onto remote 130 | return lt.remote.ClearPredecessor(target, self) 131 | } 132 | 133 | func (lt *LocalTransport) SkipSuccessor(target, self *Vnode) error { 134 | // Look for it locally 135 | obj, ok := lt.get(target) 136 | 137 | // If it exists locally, handle it 138 | if ok { 139 | return obj.SkipSuccessor(self) 140 | } 141 | 142 | // Pass onto remote 143 | return lt.remote.SkipSuccessor(target, self) 144 | } 145 | 146 | func (lt *LocalTransport) Register(v *Vnode, o VnodeRPC) { 147 | // Register local instance 148 | key := v.String() 149 | lt.lock.Lock() 150 | lt.host = v.Host 151 | lt.local[key] = &localRPC{v, o} 152 | lt.lock.Unlock() 153 | 154 | // Register with remote transport 155 | lt.remote.Register(v, o) 156 | } 157 | 158 | func (lt *LocalTransport) Deregister(v *Vnode) { 159 | key := v.String() 160 | lt.lock.Lock() 161 | delete(lt.local, key) 162 | lt.lock.Unlock() 163 | } 164 | 165 | // BlackholeTransport is used to provide an implemenation of the Transport that 166 | // does not actually do anything. Any operation will result in an error. 167 | type BlackholeTransport struct { 168 | } 169 | 170 | func (*BlackholeTransport) ListVnodes(host string) ([]*Vnode, error) { 171 | return nil, fmt.Errorf("Failed to connect! Blackhole: %s.", host) 172 | } 173 | 174 | func (*BlackholeTransport) Ping(vn *Vnode) (bool, error) { 175 | return false, nil 176 | } 177 | 178 | func (*BlackholeTransport) GetPredecessor(vn *Vnode) (*Vnode, error) { 179 | return nil, fmt.Errorf("Failed to connect! Blackhole: %s.", vn.String()) 180 | } 181 | 182 | func (*BlackholeTransport) Notify(vn, self *Vnode) ([]*Vnode, error) { 183 | return nil, fmt.Errorf("Failed to connect! Blackhole: %s", vn.String()) 184 | } 185 | 186 | func (*BlackholeTransport) FindSuccessors(vn *Vnode, n int, key []byte) ([]*Vnode, error) { 187 | return nil, fmt.Errorf("Failed to connect! Blackhole: %s", vn.String()) 188 | } 189 | 190 | func (*BlackholeTransport) ClearPredecessor(target, self *Vnode) error { 191 | return fmt.Errorf("Failed to connect! Blackhole: %s", target.String()) 192 | } 193 | 194 | func (*BlackholeTransport) SkipSuccessor(target, self *Vnode) error { 195 | return fmt.Errorf("Failed to connect! Blackhole: %s", target.String()) 196 | } 197 | 198 | func (*BlackholeTransport) Register(v *Vnode, o VnodeRPC) { 199 | } 200 | -------------------------------------------------------------------------------- /transport_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | type MockVnodeRPC struct { 9 | err error 10 | pred *Vnode 11 | not_pred *Vnode 12 | succ_list []*Vnode 13 | key []byte 14 | succ []*Vnode 15 | skip *Vnode 16 | } 17 | 18 | func (mv *MockVnodeRPC) GetPredecessor() (*Vnode, error) { 19 | return mv.pred, mv.err 20 | } 21 | func (mv *MockVnodeRPC) Notify(vn *Vnode) ([]*Vnode, error) { 22 | mv.not_pred = vn 23 | return mv.succ_list, mv.err 24 | } 25 | func (mv *MockVnodeRPC) FindSuccessors(n int, key []byte) ([]*Vnode, error) { 26 | mv.key = key 27 | return mv.succ, mv.err 28 | } 29 | 30 | func (mv *MockVnodeRPC) ClearPredecessor(p *Vnode) error { 31 | mv.pred = nil 32 | return nil 33 | } 34 | 35 | func (mv *MockVnodeRPC) SkipSuccessor(s *Vnode) error { 36 | mv.skip = s 37 | return nil 38 | } 39 | 40 | func makeLocal() *LocalTransport { 41 | return InitLocalTransport(nil).(*LocalTransport) 42 | } 43 | 44 | func TestInitLocalTransport(t *testing.T) { 45 | local := InitLocalTransport(nil).(*LocalTransport) 46 | if local.remote == nil { 47 | t.Fatalf("bad remote") 48 | } 49 | if local.local == nil { 50 | t.Fatalf("missing map") 51 | } 52 | } 53 | 54 | func TestLocalList(t *testing.T) { 55 | l := makeLocal() 56 | vn := &Vnode{Id: []byte{1}, Host: "test"} 57 | mockVN := &MockVnodeRPC{} 58 | l.Register(vn, mockVN) 59 | 60 | list, err := l.ListVnodes("test") 61 | if err != nil { 62 | t.Fatalf("unexpected err. %s", err) 63 | } 64 | if len(list) != 1 || list[0] != vn { 65 | t.Fatalf("local list failed", list) 66 | } 67 | } 68 | 69 | func TestLocalListRemote(t *testing.T) { 70 | l := makeLocal() 71 | vn := &Vnode{Id: []byte{1}, Host: "test"} 72 | mockVN := &MockVnodeRPC{} 73 | l.Register(vn, mockVN) 74 | 75 | _, err := l.ListVnodes("remote") 76 | if err == nil { 77 | t.Fatalf("expected err!") 78 | } 79 | } 80 | 81 | func TestLocalPing(t *testing.T) { 82 | l := makeLocal() 83 | vn := &Vnode{Id: []byte{1}} 84 | mockVN := &MockVnodeRPC{} 85 | l.Register(vn, mockVN) 86 | if res, err := l.Ping(vn); !res || err != nil { 87 | t.Fatalf("local ping failed") 88 | } 89 | } 90 | 91 | func TestLocalMissingPing(t *testing.T) { 92 | l := makeLocal() 93 | vn := &Vnode{Id: []byte{2}} 94 | mockVN := &MockVnodeRPC{} 95 | l.Register(vn, mockVN) 96 | 97 | // Print some random node 98 | vn2 := &Vnode{Id: []byte{3}} 99 | if res, _ := l.Ping(vn2); res { 100 | t.Fatalf("ping succeeded") 101 | } 102 | } 103 | 104 | func TestLocalGetPredecessor(t *testing.T) { 105 | l := makeLocal() 106 | pred := &Vnode{Id: []byte{10}} 107 | vn := &Vnode{Id: []byte{42}} 108 | mockVN := &MockVnodeRPC{pred: pred, err: nil} 109 | l.Register(vn, mockVN) 110 | 111 | vn2 := &Vnode{Id: []byte{42}} 112 | res, err := l.GetPredecessor(vn2) 113 | if err != nil { 114 | t.Fatalf("local GetPredecessor failed") 115 | } 116 | if res != pred { 117 | t.Fatalf("got wrong predecessor") 118 | } 119 | 120 | unknown := &Vnode{Id: []byte{1}} 121 | res, err = l.GetPredecessor(unknown) 122 | if err == nil { 123 | t.Fatalf("expected error!") 124 | } 125 | } 126 | 127 | func TestLocalNotify(t *testing.T) { 128 | l := makeLocal() 129 | suc1 := &Vnode{Id: []byte{10}} 130 | suc2 := &Vnode{Id: []byte{20}} 131 | suc3 := &Vnode{Id: []byte{30}} 132 | succ_list := []*Vnode{suc1, suc2, suc3} 133 | 134 | mockVN := &MockVnodeRPC{succ_list: succ_list, err: nil} 135 | vn := &Vnode{Id: []byte{0}} 136 | l.Register(vn, mockVN) 137 | 138 | self := &Vnode{Id: []byte{60}} 139 | res, err := l.Notify(vn, self) 140 | if err != nil { 141 | t.Fatalf("local notify failed") 142 | } 143 | if res == nil || res[0] != suc1 || res[1] != suc2 || res[2] != suc3 { 144 | t.Fatalf("got wrong successor list") 145 | } 146 | if mockVN.not_pred != self { 147 | t.Fatalf("didn't get notified correctly!") 148 | } 149 | 150 | unknown := &Vnode{Id: []byte{1}} 151 | res, err = l.Notify(unknown, self) 152 | if err == nil { 153 | t.Fatalf("remote notify should fail") 154 | } 155 | } 156 | 157 | func TestLocalFindSucc(t *testing.T) { 158 | l := makeLocal() 159 | suc := []*Vnode{&Vnode{Id: []byte{40}}} 160 | 161 | mockVN := &MockVnodeRPC{succ: suc, err: nil} 162 | vn := &Vnode{Id: []byte{12}} 163 | l.Register(vn, mockVN) 164 | 165 | key := []byte("test") 166 | res, err := l.FindSuccessors(vn, 1, key) 167 | if err != nil { 168 | t.Fatalf("local FindSuccessor failed") 169 | } 170 | if res[0] != suc[0] { 171 | t.Fatalf("got wrong successor") 172 | } 173 | if bytes.Compare(mockVN.key, key) != 0 { 174 | t.Fatalf("didn't get key correctly!") 175 | } 176 | 177 | unknown := &Vnode{Id: []byte{1}} 178 | res, err = l.FindSuccessors(unknown, 1, key) 179 | if err == nil { 180 | t.Fatalf("remote find should fail") 181 | } 182 | } 183 | 184 | func TestLocalClearPred(t *testing.T) { 185 | l := makeLocal() 186 | pred := &Vnode{Id: []byte{10}} 187 | mockVN := &MockVnodeRPC{pred: pred} 188 | vn := &Vnode{Id: []byte{12}} 189 | l.Register(vn, mockVN) 190 | 191 | err := l.ClearPredecessor(vn, pred) 192 | if err != nil { 193 | t.Fatalf("local ClearPredecessor failed") 194 | } 195 | if mockVN.pred != nil { 196 | t.Fatalf("clear failed") 197 | } 198 | 199 | unknown := &Vnode{Id: []byte{1}} 200 | err = l.ClearPredecessor(unknown, pred) 201 | if err == nil { 202 | t.Fatalf("remote clear should fail") 203 | } 204 | } 205 | 206 | func TestLocalSkipSucc(t *testing.T) { 207 | l := makeLocal() 208 | suc := []*Vnode{&Vnode{Id: []byte{40}}} 209 | mockVN := &MockVnodeRPC{succ: suc} 210 | vn := &Vnode{Id: []byte{12}} 211 | l.Register(vn, mockVN) 212 | 213 | s := &Vnode{Id: []byte{40}} 214 | err := l.SkipSuccessor(vn, s) 215 | if err != nil { 216 | t.Fatalf("local Skip failed") 217 | } 218 | if mockVN.skip != s { 219 | t.Fatalf("skip failed") 220 | } 221 | 222 | unknown := &Vnode{Id: []byte{1}} 223 | err = l.SkipSuccessor(unknown, s) 224 | if err == nil { 225 | t.Fatalf("remote skip should fail") 226 | } 227 | } 228 | 229 | func TestLocalDeregister(t *testing.T) { 230 | l := makeLocal() 231 | vn := &Vnode{Id: []byte{1}} 232 | mockVN := &MockVnodeRPC{} 233 | l.Register(vn, mockVN) 234 | if res, err := l.Ping(vn); !res || err != nil { 235 | t.Fatalf("local ping failed") 236 | } 237 | l.Deregister(vn) 238 | if res, _ := l.Ping(vn); res { 239 | t.Fatalf("local ping succeeded") 240 | } 241 | } 242 | 243 | func TestBHList(t *testing.T) { 244 | bh := BlackholeTransport{} 245 | res, err := bh.ListVnodes("test") 246 | if res != nil || err == nil { 247 | t.Fatalf("expected fail") 248 | } 249 | } 250 | 251 | func TestBHPing(t *testing.T) { 252 | bh := BlackholeTransport{} 253 | vn := &Vnode{Id: []byte{12}} 254 | res, err := bh.Ping(vn) 255 | if res || err != nil { 256 | t.Fatalf("expected fail") 257 | } 258 | } 259 | 260 | func TestBHGetPred(t *testing.T) { 261 | bh := BlackholeTransport{} 262 | vn := &Vnode{Id: []byte{12}} 263 | _, err := bh.GetPredecessor(vn) 264 | if err.Error()[:18] != "Failed to connect!" { 265 | t.Fatalf("expected fail") 266 | } 267 | } 268 | 269 | func TestBHNotify(t *testing.T) { 270 | bh := BlackholeTransport{} 271 | vn := &Vnode{Id: []byte{12}} 272 | vn2 := &Vnode{Id: []byte{42}} 273 | _, err := bh.Notify(vn, vn2) 274 | if err.Error()[:18] != "Failed to connect!" { 275 | t.Fatalf("expected fail") 276 | } 277 | } 278 | 279 | func TestBHFindSuccessors(t *testing.T) { 280 | bh := BlackholeTransport{} 281 | vn := &Vnode{Id: []byte{12}} 282 | _, err := bh.FindSuccessors(vn, 1, []byte("test")) 283 | if err.Error()[:18] != "Failed to connect!" { 284 | t.Fatalf("expected fail") 285 | } 286 | } 287 | 288 | func TestBHClearPred(t *testing.T) { 289 | bh := BlackholeTransport{} 290 | vn := &Vnode{Id: []byte{12}} 291 | s := &Vnode{Id: []byte{50}} 292 | err := bh.ClearPredecessor(vn, s) 293 | if err.Error()[:18] != "Failed to connect!" { 294 | t.Fatalf("expected fail") 295 | } 296 | } 297 | 298 | func TestBHSkipSucc(t *testing.T) { 299 | bh := BlackholeTransport{} 300 | vn := &Vnode{Id: []byte{12}} 301 | s := &Vnode{Id: []byte{50}} 302 | err := bh.SkipSuccessor(vn, s) 303 | if err.Error()[:18] != "Failed to connect!" { 304 | t.Fatalf("expected fail") 305 | } 306 | } 307 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "math/big" 7 | "math/rand" 8 | "time" 9 | ) 10 | 11 | // Generates a random stabilization time 12 | func randStabilize(conf *Config) time.Duration { 13 | min := conf.StabilizeMin 14 | max := conf.StabilizeMax 15 | r := rand.Float64() 16 | return time.Duration((r * float64(max-min)) + float64(min)) 17 | } 18 | 19 | // Checks if a key is STRICTLY between two ID's exclusively 20 | func between(id1, id2, key []byte) bool { 21 | // Check for ring wrap around 22 | if bytes.Compare(id1, id2) == 1 { 23 | return bytes.Compare(id1, key) == -1 || 24 | bytes.Compare(id2, key) == 1 25 | } 26 | 27 | // Handle the normal case 28 | return bytes.Compare(id1, key) == -1 && 29 | bytes.Compare(id2, key) == 1 30 | } 31 | 32 | // Checks if a key is between two ID's, right inclusive 33 | func betweenRightIncl(id1, id2, key []byte) bool { 34 | // Check for ring wrap around 35 | if bytes.Compare(id1, id2) == 1 { 36 | return bytes.Compare(id1, key) == -1 || 37 | bytes.Compare(id2, key) >= 0 38 | } 39 | 40 | return bytes.Compare(id1, key) == -1 && 41 | bytes.Compare(id2, key) >= 0 42 | } 43 | 44 | // Computes the offset by (n + 2^exp) % (2^mod) 45 | func powerOffset(id []byte, exp int, mod int) []byte { 46 | // Copy the existing slice 47 | off := make([]byte, len(id)) 48 | copy(off, id) 49 | 50 | // Convert the ID to a bigint 51 | idInt := big.Int{} 52 | idInt.SetBytes(id) 53 | 54 | // Get the offset 55 | two := big.NewInt(2) 56 | offset := big.Int{} 57 | offset.Exp(two, big.NewInt(int64(exp)), nil) 58 | 59 | // Sum 60 | sum := big.Int{} 61 | sum.Add(&idInt, &offset) 62 | 63 | // Get the ceiling 64 | ceil := big.Int{} 65 | ceil.Exp(two, big.NewInt(int64(mod)), nil) 66 | 67 | // Apply the mod 68 | idInt.Mod(&sum, &ceil) 69 | 70 | // Add together 71 | return idInt.Bytes() 72 | } 73 | 74 | // max returns the max of two ints 75 | func max(a, b int) int { 76 | if a >= b { 77 | return a 78 | } else { 79 | return b 80 | } 81 | } 82 | 83 | // min returns the min of two ints 84 | func min(a, b int) int { 85 | if a <= b { 86 | return a 87 | } else { 88 | return b 89 | } 90 | } 91 | 92 | // Returns the vnode nearest a key 93 | func nearestVnodeToKey(vnodes []*Vnode, key []byte) *Vnode { 94 | for i := len(vnodes) - 1; i >= 0; i-- { 95 | if bytes.Compare(vnodes[i].Id, key) == -1 { 96 | return vnodes[i] 97 | } 98 | } 99 | // Return the last vnode 100 | return vnodes[len(vnodes)-1] 101 | } 102 | 103 | // Merges errors together 104 | func mergeErrors(err1, err2 error) error { 105 | if err1 == nil { 106 | return err2 107 | } else if err2 == nil { 108 | return err1 109 | } else { 110 | return fmt.Errorf("%s\n%s", err1, err2) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /util_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestRandStabilize(t *testing.T) { 10 | min := time.Duration(10 * time.Second) 11 | max := time.Duration(30 * time.Second) 12 | conf := &Config{ 13 | StabilizeMin: min, 14 | StabilizeMax: max} 15 | 16 | var times []time.Duration 17 | for i := 0; i < 1000; i++ { 18 | after := randStabilize(conf) 19 | times = append(times, after) 20 | if after < min { 21 | t.Fatalf("after below min") 22 | } 23 | if after > max { 24 | t.Fatalf("after above max") 25 | } 26 | } 27 | 28 | collisions := 0 29 | for idx, val := range times { 30 | for i := 0; i < len(times); i++ { 31 | if idx != i && times[i] == val { 32 | collisions += 1 33 | } 34 | } 35 | } 36 | 37 | if collisions > 3 { 38 | t.Fatalf("too many collisions! %d", collisions) 39 | } 40 | } 41 | 42 | func TestBetween(t *testing.T) { 43 | t1 := []byte{0, 0, 0, 0} 44 | t2 := []byte{1, 0, 0, 0} 45 | k := []byte{0, 0, 5, 0} 46 | if !between(t1, t2, k) { 47 | t.Fatalf("expected k between!") 48 | } 49 | if between(t1, t2, t1) { 50 | t.Fatalf("dont expect t1 between!") 51 | } 52 | if between(t1, t2, t2) { 53 | t.Fatalf("dont expect t1 between!") 54 | } 55 | 56 | k = []byte{2, 0, 0, 0} 57 | if between(t1, t2, k) { 58 | t.Fatalf("dont expect k between!") 59 | } 60 | } 61 | 62 | func TestBetweenWrap(t *testing.T) { 63 | t1 := []byte{0xff, 0, 0, 0} 64 | t2 := []byte{1, 0, 0, 0} 65 | k := []byte{0, 0, 5, 0} 66 | if !between(t1, t2, k) { 67 | t.Fatalf("expected k between!") 68 | } 69 | 70 | k = []byte{0xff, 0xff, 0, 0} 71 | if !between(t1, t2, k) { 72 | t.Fatalf("expect k between!") 73 | } 74 | } 75 | 76 | func TestBetweenRightIncl(t *testing.T) { 77 | t1 := []byte{0, 0, 0, 0} 78 | t2 := []byte{1, 0, 0, 0} 79 | k := []byte{1, 0, 0, 0} 80 | if !betweenRightIncl(t1, t2, k) { 81 | t.Fatalf("expected k between!") 82 | } 83 | } 84 | 85 | func TestBetweenRightInclWrap(t *testing.T) { 86 | t1 := []byte{0xff, 0, 0, 0} 87 | t2 := []byte{1, 0, 0, 0} 88 | k := []byte{1, 0, 0, 0} 89 | if !betweenRightIncl(t1, t2, k) { 90 | t.Fatalf("expected k between!") 91 | } 92 | } 93 | 94 | func TestPowerOffset(t *testing.T) { 95 | id := []byte{0, 0, 0, 0} 96 | exp := 30 97 | mod := 32 98 | val := powerOffset(id, exp, mod) 99 | if val[0] != 64 { 100 | t.Fatalf("unexpected val! %v", val) 101 | } 102 | 103 | // 0-7, 8-15, 16-23, 24-31 104 | id = []byte{0, 0xff, 0xff, 0xff} 105 | exp = 23 106 | val = powerOffset(id, exp, mod) 107 | if val[0] != 1 || val[1] != 0x7f || val[2] != 0xff || val[3] != 0xff { 108 | t.Fatalf("unexpected val! %v", val) 109 | } 110 | } 111 | 112 | func TestMax(t *testing.T) { 113 | if max(-10, 10) != 10 { 114 | t.Fatalf("bad max") 115 | } 116 | if max(10, -10) != 10 { 117 | t.Fatalf("bad max") 118 | } 119 | } 120 | 121 | func TestMin(t *testing.T) { 122 | if min(-10, 10) != -10 { 123 | t.Fatalf("bad min") 124 | } 125 | if min(10, -10) != -10 { 126 | t.Fatalf("bad min") 127 | } 128 | } 129 | 130 | func TestNearestVnodesKey(t *testing.T) { 131 | vnodes := make([]*Vnode, 5) 132 | vnodes[0] = &Vnode{Id: []byte{2}} 133 | vnodes[1] = &Vnode{Id: []byte{4}} 134 | vnodes[2] = &Vnode{Id: []byte{7}} 135 | vnodes[3] = &Vnode{Id: []byte{10}} 136 | vnodes[4] = &Vnode{Id: []byte{14}} 137 | key := []byte{6} 138 | 139 | near := nearestVnodeToKey(vnodes, key) 140 | if near != vnodes[1] { 141 | t.Fatalf("got wrong node back!") 142 | } 143 | 144 | key = []byte{0} 145 | near = nearestVnodeToKey(vnodes, key) 146 | if near != vnodes[4] { 147 | t.Fatalf("got wrong node back!") 148 | } 149 | } 150 | 151 | func TestMergeErrors(t *testing.T) { 152 | e1 := errors.New("test1") 153 | e2 := errors.New("test2") 154 | 155 | if mergeErrors(e1, nil) != e1 { 156 | t.Fatalf("bad merge") 157 | } 158 | if mergeErrors(nil, e1) != e1 { 159 | t.Fatalf("bad merge") 160 | } 161 | if mergeErrors(nil, nil) != nil { 162 | t.Fatalf("bad merge") 163 | } 164 | if mergeErrors(e1, e2).Error() != "test1\ntest2" { 165 | t.Fatalf("bad merge") 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /vnode.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "log" 7 | "time" 8 | ) 9 | 10 | // Converts the ID to string 11 | func (vn *Vnode) String() string { 12 | return fmt.Sprintf("%x", vn.Id) 13 | } 14 | 15 | // Initializes a local vnode 16 | func (vn *localVnode) init(idx int) { 17 | // Generate an ID 18 | vn.genId(uint16(idx)) 19 | 20 | // Set our host 21 | vn.Host = vn.ring.config.Hostname 22 | 23 | // Initialize all state 24 | vn.successors = make([]*Vnode, vn.ring.config.NumSuccessors) 25 | vn.finger = make([]*Vnode, vn.ring.config.hashBits) 26 | 27 | // Register with the RPC mechanism 28 | vn.ring.transport.Register(&vn.Vnode, vn) 29 | } 30 | 31 | // Schedules the Vnode to do regular maintenence 32 | func (vn *localVnode) schedule() { 33 | // Setup our stabilize timer 34 | vn.timer = time.AfterFunc(randStabilize(vn.ring.config), vn.stabilize) 35 | } 36 | 37 | // Generates an ID for the node 38 | func (vn *localVnode) genId(idx uint16) { 39 | // Use the hash funciton 40 | conf := vn.ring.config 41 | hash := conf.HashFunc() 42 | hash.Write([]byte(conf.Hostname)) 43 | binary.Write(hash, binary.BigEndian, idx) 44 | 45 | // Use the hash as the ID 46 | vn.Id = hash.Sum(nil) 47 | } 48 | 49 | // Called to periodically stabilize the vnode 50 | func (vn *localVnode) stabilize() { 51 | // Clear the timer 52 | vn.timer = nil 53 | 54 | // Check for shutdown 55 | if vn.ring.shutdown != nil { 56 | vn.ring.shutdown <- true 57 | return 58 | } 59 | 60 | // Setup the next stabilize timer 61 | defer vn.schedule() 62 | 63 | // Check for new successor 64 | if err := vn.checkNewSuccessor(); err != nil { 65 | log.Printf("[ERR] Error checking for new successor: %s", err) 66 | } 67 | 68 | // Notify the successor 69 | if err := vn.notifySuccessor(); err != nil { 70 | log.Printf("[ERR] Error notifying successor: %s", err) 71 | } 72 | 73 | // Finger table fix up 74 | if err := vn.fixFingerTable(); err != nil { 75 | log.Printf("[ERR] Error fixing finger table: %s", err) 76 | } 77 | 78 | // Check the predecessor 79 | if err := vn.checkPredecessor(); err != nil { 80 | log.Printf("[ERR] Error checking predecessor: %s", err) 81 | } 82 | 83 | // Set the last stabilized time 84 | vn.stabilized = time.Now() 85 | } 86 | 87 | // Checks for a new successor 88 | func (vn *localVnode) checkNewSuccessor() error { 89 | // Ask our successor for it's predecessor 90 | trans := vn.ring.transport 91 | 92 | CHECK_NEW_SUC: 93 | succ := vn.successors[0] 94 | if succ == nil { 95 | panic("Node has no successor!") 96 | } 97 | maybe_suc, err := trans.GetPredecessor(succ) 98 | if err != nil { 99 | // Check if we have succ list, try to contact next live succ 100 | known := vn.knownSuccessors() 101 | if known > 1 { 102 | for i := 0; i < known; i++ { 103 | if alive, _ := trans.Ping(vn.successors[0]); !alive { 104 | // Don't eliminate the last successor we know of 105 | if i+1 == known { 106 | return fmt.Errorf("All known successors dead!") 107 | } 108 | 109 | // Advance the successors list past the dead one 110 | copy(vn.successors[0:], vn.successors[1:]) 111 | vn.successors[known-1-i] = nil 112 | } else { 113 | // Found live successor, check for new one 114 | goto CHECK_NEW_SUC 115 | } 116 | } 117 | } 118 | return err 119 | } 120 | 121 | // Check if we should replace our successor 122 | if maybe_suc != nil && between(vn.Id, succ.Id, maybe_suc.Id) { 123 | // Check if new successor is alive before switching 124 | alive, err := trans.Ping(maybe_suc) 125 | if alive && err == nil { 126 | copy(vn.successors[1:], vn.successors[0:len(vn.successors)-1]) 127 | vn.successors[0] = maybe_suc 128 | } else { 129 | return err 130 | } 131 | } 132 | return nil 133 | } 134 | 135 | // RPC: Invoked to return out predecessor 136 | func (vn *localVnode) GetPredecessor() (*Vnode, error) { 137 | return vn.predecessor, nil 138 | } 139 | 140 | // Notifies our successor of us, updates successor list 141 | func (vn *localVnode) notifySuccessor() error { 142 | // Notify successor 143 | succ := vn.successors[0] 144 | succ_list, err := vn.ring.transport.Notify(succ, &vn.Vnode) 145 | if err != nil { 146 | return err 147 | } 148 | 149 | // Trim the successors list if too long 150 | max_succ := vn.ring.config.NumSuccessors 151 | if len(succ_list) > max_succ-1 { 152 | succ_list = succ_list[:max_succ-1] 153 | } 154 | 155 | // Update local successors list 156 | for idx, s := range succ_list { 157 | if s == nil { 158 | break 159 | } 160 | // Ensure we don't set ourselves as a successor! 161 | if s == nil || s.String() == vn.String() { 162 | break 163 | } 164 | vn.successors[idx+1] = s 165 | } 166 | return nil 167 | } 168 | 169 | // RPC: Notify is invoked when a Vnode gets notified 170 | func (vn *localVnode) Notify(maybe_pred *Vnode) ([]*Vnode, error) { 171 | // Check if we should update our predecessor 172 | if vn.predecessor == nil || between(vn.predecessor.Id, vn.Id, maybe_pred.Id) { 173 | // Inform the delegate 174 | conf := vn.ring.config 175 | old := vn.predecessor 176 | vn.ring.invokeDelegate(func() { 177 | conf.Delegate.NewPredecessor(&vn.Vnode, maybe_pred, old) 178 | }) 179 | 180 | vn.predecessor = maybe_pred 181 | } 182 | 183 | // Return our successors list 184 | return vn.successors, nil 185 | } 186 | 187 | // Fixes up the finger table 188 | func (vn *localVnode) fixFingerTable() error { 189 | // Determine the offset 190 | hb := vn.ring.config.hashBits 191 | offset := powerOffset(vn.Id, vn.last_finger, hb) 192 | 193 | // Find the successor 194 | nodes, err := vn.FindSuccessors(1, offset) 195 | if nodes == nil || len(nodes) == 0 || err != nil { 196 | return err 197 | } 198 | node := nodes[0] 199 | 200 | // Update the finger table 201 | vn.finger[vn.last_finger] = node 202 | 203 | // Try to skip as many finger entries as possible 204 | for { 205 | next := vn.last_finger + 1 206 | if next >= hb { 207 | break 208 | } 209 | offset := powerOffset(vn.Id, next, hb) 210 | 211 | // While the node is the successor, update the finger entries 212 | if betweenRightIncl(vn.Id, node.Id, offset) { 213 | vn.finger[next] = node 214 | vn.last_finger = next 215 | } else { 216 | break 217 | } 218 | } 219 | 220 | // Increment to the index to repair 221 | if vn.last_finger+1 == hb { 222 | vn.last_finger = 0 223 | } else { 224 | vn.last_finger++ 225 | } 226 | 227 | return nil 228 | } 229 | 230 | // Checks the health of our predecessor 231 | func (vn *localVnode) checkPredecessor() error { 232 | // Check predecessor 233 | if vn.predecessor != nil { 234 | res, err := vn.ring.transport.Ping(vn.predecessor) 235 | if err != nil { 236 | return err 237 | } 238 | 239 | // Predecessor is dead 240 | if !res { 241 | vn.predecessor = nil 242 | } 243 | } 244 | return nil 245 | } 246 | 247 | // Finds next N successors. N must be <= NumSuccessors 248 | func (vn *localVnode) FindSuccessors(n int, key []byte) ([]*Vnode, error) { 249 | // Check if we are the immediate predecessor 250 | if betweenRightIncl(vn.Id, vn.successors[0].Id, key) { 251 | return vn.successors[:n], nil 252 | } 253 | 254 | // Try the closest preceeding nodes 255 | cp := closestPreceedingVnodeIterator{} 256 | cp.init(vn, key) 257 | for { 258 | // Get the next closest node 259 | closest := cp.Next() 260 | if closest == nil { 261 | break 262 | } 263 | 264 | // Try that node, break on success 265 | res, err := vn.ring.transport.FindSuccessors(closest, n, key) 266 | if err == nil { 267 | return res, nil 268 | } else { 269 | log.Printf("[ERR] Failed to contact %s. Got %s", closest.String(), err) 270 | } 271 | } 272 | 273 | // Determine how many successors we know of 274 | successors := vn.knownSuccessors() 275 | 276 | // Check if the ID is between us and any non-immediate successors 277 | for i := 1; i <= successors-n; i++ { 278 | if betweenRightIncl(vn.Id, vn.successors[i].Id, key) { 279 | remain := vn.successors[i:] 280 | if len(remain) > n { 281 | remain = remain[:n] 282 | } 283 | return remain, nil 284 | } 285 | } 286 | 287 | // Checked all closer nodes and our successors! 288 | return nil, fmt.Errorf("Exhausted all preceeding nodes!") 289 | } 290 | 291 | // Instructs the vnode to leave 292 | func (vn *localVnode) leave() error { 293 | // Inform the delegate we are leaving 294 | conf := vn.ring.config 295 | pred := vn.predecessor 296 | succ := vn.successors[0] 297 | vn.ring.invokeDelegate(func() { 298 | conf.Delegate.Leaving(&vn.Vnode, pred, succ) 299 | }) 300 | 301 | // Notify predecessor to advance to their next successor 302 | var err error 303 | trans := vn.ring.transport 304 | if vn.predecessor != nil { 305 | err = trans.SkipSuccessor(vn.predecessor, &vn.Vnode) 306 | } 307 | 308 | // Notify successor to clear old predecessor 309 | err = mergeErrors(err, trans.ClearPredecessor(vn.successors[0], &vn.Vnode)) 310 | return err 311 | } 312 | 313 | // Used to clear our predecessor when a node is leaving 314 | func (vn *localVnode) ClearPredecessor(p *Vnode) error { 315 | if vn.predecessor != nil && vn.predecessor.String() == p.String() { 316 | // Inform the delegate 317 | conf := vn.ring.config 318 | old := vn.predecessor 319 | vn.ring.invokeDelegate(func() { 320 | conf.Delegate.PredecessorLeaving(&vn.Vnode, old) 321 | }) 322 | vn.predecessor = nil 323 | } 324 | return nil 325 | } 326 | 327 | // Used to skip a successor when a node is leaving 328 | func (vn *localVnode) SkipSuccessor(s *Vnode) error { 329 | // Skip if we have a match 330 | if vn.successors[0].String() == s.String() { 331 | // Inform the delegate 332 | conf := vn.ring.config 333 | old := vn.successors[0] 334 | vn.ring.invokeDelegate(func() { 335 | conf.Delegate.SuccessorLeaving(&vn.Vnode, old) 336 | }) 337 | 338 | known := vn.knownSuccessors() 339 | copy(vn.successors[0:], vn.successors[1:]) 340 | vn.successors[known-1] = nil 341 | } 342 | return nil 343 | } 344 | 345 | // Determine how many successors we know of 346 | func (vn *localVnode) knownSuccessors() (successors int) { 347 | for i := 0; i < len(vn.successors); i++ { 348 | if vn.successors[i] != nil { 349 | successors = i + 1 350 | } 351 | } 352 | return 353 | } 354 | -------------------------------------------------------------------------------- /vnode_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "bytes" 5 | "crypto/sha1" 6 | "sort" 7 | "testing" 8 | "time" 9 | ) 10 | 11 | func makeVnode() *localVnode { 12 | min := time.Duration(10 * time.Second) 13 | max := time.Duration(30 * time.Second) 14 | conf := &Config{ 15 | NumSuccessors: 8, 16 | StabilizeMin: min, 17 | StabilizeMax: max, 18 | HashFunc: sha1.New} 19 | trans := InitLocalTransport(nil) 20 | ring := &Ring{config: conf, transport: trans} 21 | return &localVnode{ring: ring} 22 | } 23 | 24 | func TestVnodeInit(t *testing.T) { 25 | vn := makeVnode() 26 | vn.init(0) 27 | if vn.Id == nil { 28 | t.Fatalf("unexpected nil") 29 | } 30 | if vn.successors == nil { 31 | t.Fatalf("unexpected nil") 32 | } 33 | if vn.finger == nil { 34 | t.Fatalf("unexpected nil") 35 | } 36 | if vn.timer != nil { 37 | t.Fatalf("unexpected timer") 38 | } 39 | } 40 | 41 | func TestVnodeSchedule(t *testing.T) { 42 | vn := makeVnode() 43 | vn.schedule() 44 | if vn.timer == nil { 45 | t.Fatalf("unexpected nil") 46 | } 47 | } 48 | 49 | func TestGenId(t *testing.T) { 50 | vn := makeVnode() 51 | var ids [][]byte 52 | for i := 0; i < 16; i++ { 53 | vn.genId(uint16(i)) 54 | ids = append(ids, vn.Id) 55 | } 56 | 57 | for idx, val := range ids { 58 | for i := 0; i < len(ids); i++ { 59 | if idx != i && bytes.Compare(ids[i], val) == 0 { 60 | t.Fatalf("unexpected id collision!") 61 | } 62 | } 63 | } 64 | } 65 | 66 | func TestVnodeStabilizeShutdown(t *testing.T) { 67 | vn := makeVnode() 68 | vn.schedule() 69 | vn.ring.shutdown = make(chan bool, 1) 70 | vn.stabilize() 71 | 72 | if vn.timer != nil { 73 | t.Fatalf("unexpected timer") 74 | } 75 | if !vn.stabilized.IsZero() { 76 | t.Fatalf("unexpected time") 77 | } 78 | select { 79 | case <-vn.ring.shutdown: 80 | return 81 | default: 82 | t.Fatalf("expected message") 83 | } 84 | } 85 | 86 | func TestVnodeStabilizeResched(t *testing.T) { 87 | vn := makeVnode() 88 | vn.init(1) 89 | vn.successors[0] = &vn.Vnode 90 | vn.schedule() 91 | vn.stabilize() 92 | 93 | if vn.timer == nil { 94 | t.Fatalf("expected timer") 95 | } 96 | if vn.stabilized.IsZero() { 97 | t.Fatalf("expected time") 98 | } 99 | vn.timer.Stop() 100 | } 101 | 102 | func TestVnodeKnownSucc(t *testing.T) { 103 | vn := makeVnode() 104 | vn.init(0) 105 | if vn.knownSuccessors() != 0 { 106 | t.Fatalf("wrong num known!") 107 | } 108 | vn.successors[0] = &Vnode{Id: []byte{1}} 109 | if vn.knownSuccessors() != 1 { 110 | t.Fatalf("wrong num known!") 111 | } 112 | } 113 | 114 | // Checks panic if no successors 115 | func TestVnodeCheckNewSuccAlivePanic(t *testing.T) { 116 | defer func() { 117 | if r := recover(); r == nil { 118 | t.Fatalf("expected panic!") 119 | } 120 | }() 121 | vn1 := makeVnode() 122 | vn1.init(1) 123 | vn1.checkNewSuccessor() 124 | } 125 | 126 | // Checks pinging a live successor with no changes 127 | func TestVnodeCheckNewSuccAlive(t *testing.T) { 128 | vn1 := makeVnode() 129 | vn1.init(1) 130 | 131 | vn2 := makeVnode() 132 | vn2.ring = vn1.ring 133 | vn2.init(2) 134 | vn2.predecessor = &vn1.Vnode 135 | vn1.successors[0] = &vn2.Vnode 136 | 137 | if pred, _ := vn2.GetPredecessor(); pred != &vn1.Vnode { 138 | t.Fatalf("expected vn1 as predecessor") 139 | } 140 | 141 | if err := vn1.checkNewSuccessor(); err != nil { 142 | t.Fatalf("unexpected err %s", err) 143 | } 144 | 145 | if vn1.successors[0] != &vn2.Vnode { 146 | t.Fatalf("unexpected successor!") 147 | } 148 | } 149 | 150 | // Checks pinging a dead successor with no alternates 151 | func TestVnodeCheckNewSuccDead(t *testing.T) { 152 | vn1 := makeVnode() 153 | vn1.init(1) 154 | vn1.successors[0] = &Vnode{Id: []byte{0}} 155 | 156 | if err := vn1.checkNewSuccessor(); err == nil { 157 | t.Fatalf("err!", err) 158 | } 159 | 160 | if vn1.successors[0].String() != "00" { 161 | t.Fatalf("unexpected successor!") 162 | } 163 | } 164 | 165 | // Checks pinging a dead successor with alternate 166 | func TestVnodeCheckNewSuccDeadAlternate(t *testing.T) { 167 | r := makeRing() 168 | sort.Sort(r) 169 | 170 | vn1 := r.vnodes[0] 171 | vn2 := r.vnodes[1] 172 | vn3 := r.vnodes[2] 173 | 174 | vn1.successors[0] = &vn2.Vnode 175 | vn1.successors[1] = &vn3.Vnode 176 | vn2.predecessor = &vn1.Vnode 177 | vn3.predecessor = &vn2.Vnode 178 | 179 | // Remove vn2 180 | (r.transport.(*LocalTransport)).Deregister(&vn2.Vnode) 181 | 182 | // Should not get an error 183 | if err := vn1.checkNewSuccessor(); err != nil { 184 | t.Fatalf("unexpected err %s", err) 185 | } 186 | 187 | // Should become vn3 188 | if vn1.successors[0] != &vn3.Vnode { 189 | t.Fatalf("unexpected successor!") 190 | } 191 | } 192 | 193 | // Checks pinging a dead successor with all dead alternates 194 | func TestVnodeCheckNewSuccAllDeadAlternates(t *testing.T) { 195 | r := makeRing() 196 | sort.Sort(r) 197 | 198 | vn1 := r.vnodes[0] 199 | vn2 := r.vnodes[1] 200 | vn3 := r.vnodes[2] 201 | 202 | vn1.successors[0] = &vn2.Vnode 203 | vn1.successors[1] = &vn3.Vnode 204 | vn2.predecessor = &vn1.Vnode 205 | vn3.predecessor = &vn2.Vnode 206 | 207 | // Remove vn2 208 | (r.transport.(*LocalTransport)).Deregister(&vn2.Vnode) 209 | (r.transport.(*LocalTransport)).Deregister(&vn3.Vnode) 210 | 211 | // Should get an error 212 | if err := vn1.checkNewSuccessor(); err.Error() != "All known successors dead!" { 213 | t.Fatalf("unexpected err %s", err) 214 | } 215 | 216 | // Should just be vn3 217 | if vn1.successors[0] != &vn3.Vnode { 218 | t.Fatalf("unexpected successor!") 219 | } 220 | } 221 | 222 | // Checks pinging a successor, and getting a new successor 223 | func TestVnodeCheckNewSuccNewSucc(t *testing.T) { 224 | r := makeRing() 225 | sort.Sort(r) 226 | 227 | vn1 := r.vnodes[0] 228 | vn2 := r.vnodes[1] 229 | vn3 := r.vnodes[2] 230 | 231 | vn1.successors[0] = &vn3.Vnode 232 | vn2.predecessor = &vn1.Vnode 233 | vn3.predecessor = &vn2.Vnode 234 | 235 | // vn3 pred is vn2 236 | if pred, _ := vn3.GetPredecessor(); pred != &vn2.Vnode { 237 | t.Fatalf("expected vn2 as predecessor") 238 | } 239 | 240 | // Should not get an error 241 | if err := vn1.checkNewSuccessor(); err != nil { 242 | t.Fatalf("unexpected err %s", err) 243 | } 244 | 245 | // Should become vn2 246 | if vn1.successors[0] != &vn2.Vnode { 247 | t.Fatalf("unexpected successor! %s", vn1.successors[0]) 248 | } 249 | 250 | // 2nd successor should become vn3 251 | if vn1.successors[1] != &vn3.Vnode { 252 | t.Fatalf("unexpected 2nd successor!") 253 | } 254 | } 255 | 256 | // Checks pinging a successor, and getting a new successor 257 | // which is not alive 258 | func TestVnodeCheckNewSuccNewSuccDead(t *testing.T) { 259 | r := makeRing() 260 | sort.Sort(r) 261 | 262 | vn1 := r.vnodes[0] 263 | vn2 := r.vnodes[1] 264 | vn3 := r.vnodes[2] 265 | 266 | vn1.successors[0] = &vn3.Vnode 267 | vn2.predecessor = &vn1.Vnode 268 | vn3.predecessor = &vn2.Vnode 269 | 270 | // Remove vn2 271 | (r.transport.(*LocalTransport)).Deregister(&vn2.Vnode) 272 | 273 | // Should not get an error 274 | if err := vn1.checkNewSuccessor(); err != nil { 275 | t.Fatalf("unexpected err %s", err) 276 | } 277 | 278 | // Should stay vn3 279 | if vn1.successors[0] != &vn3.Vnode { 280 | t.Fatalf("unexpected successor!") 281 | } 282 | } 283 | 284 | // Test notifying a successor successfully 285 | func TestVnodeNotifySucc(t *testing.T) { 286 | r := makeRing() 287 | sort.Sort(r) 288 | 289 | s1 := &Vnode{Id: []byte{1}} 290 | s2 := &Vnode{Id: []byte{2}} 291 | s3 := &Vnode{Id: []byte{3}} 292 | 293 | vn1 := r.vnodes[0] 294 | vn2 := r.vnodes[1] 295 | vn1.successors[0] = &vn2.Vnode 296 | vn2.predecessor = &vn1.Vnode 297 | vn2.successors[0] = s1 298 | vn2.successors[1] = s2 299 | vn2.successors[2] = s3 300 | 301 | // Should get no error 302 | if err := vn1.notifySuccessor(); err != nil { 303 | t.Fatalf("unexpected err %s", err) 304 | } 305 | 306 | // Successor list should be updated 307 | if vn1.successors[1] != s1 { 308 | t.Fatalf("bad succ 1") 309 | } 310 | if vn1.successors[2] != s2 { 311 | t.Fatalf("bad succ 2") 312 | } 313 | if vn1.successors[3] != s3 { 314 | t.Fatalf("bad succ 3") 315 | } 316 | 317 | // Predecessor should not updated 318 | if vn2.predecessor != &vn1.Vnode { 319 | t.Fatalf("bad predecessor") 320 | } 321 | } 322 | 323 | // Test notifying a dead successor 324 | func TestVnodeNotifySuccDead(t *testing.T) { 325 | r := makeRing() 326 | sort.Sort(r) 327 | 328 | vn1 := r.vnodes[0] 329 | vn2 := r.vnodes[1] 330 | vn1.successors[0] = &vn2.Vnode 331 | vn2.predecessor = &vn1.Vnode 332 | 333 | // Remove vn2 334 | (r.transport.(*LocalTransport)).Deregister(&vn2.Vnode) 335 | 336 | // Should get error 337 | if err := vn1.notifySuccessor(); err == nil { 338 | t.Fatalf("expected err!") 339 | } 340 | } 341 | 342 | func TestVnodeNotifySamePred(t *testing.T) { 343 | r := makeRing() 344 | sort.Sort(r) 345 | 346 | s1 := &Vnode{Id: []byte{1}} 347 | s2 := &Vnode{Id: []byte{2}} 348 | s3 := &Vnode{Id: []byte{3}} 349 | 350 | vn1 := r.vnodes[0] 351 | vn2 := r.vnodes[1] 352 | vn1.successors[0] = &vn2.Vnode 353 | vn2.predecessor = &vn1.Vnode 354 | vn2.successors[0] = s1 355 | vn2.successors[1] = s2 356 | vn2.successors[2] = s3 357 | 358 | succs, err := vn2.Notify(&vn1.Vnode) 359 | if err != nil { 360 | t.Fatalf("unexpected error! %s", err) 361 | } 362 | if succs[0] != s1 { 363 | t.Fatalf("unexpected succ 0") 364 | } 365 | if succs[1] != s2 { 366 | t.Fatalf("unexpected succ 1") 367 | } 368 | if succs[2] != s3 { 369 | t.Fatalf("unexpected succ 2") 370 | } 371 | if vn2.predecessor != &vn1.Vnode { 372 | t.Fatalf("unexpected pred") 373 | } 374 | } 375 | 376 | func TestVnodeNotifyNoPred(t *testing.T) { 377 | r := makeRing() 378 | sort.Sort(r) 379 | 380 | s1 := &Vnode{Id: []byte{1}} 381 | s2 := &Vnode{Id: []byte{2}} 382 | s3 := &Vnode{Id: []byte{3}} 383 | 384 | vn1 := r.vnodes[0] 385 | vn2 := r.vnodes[1] 386 | vn2.successors[0] = s1 387 | vn2.successors[1] = s2 388 | vn2.successors[2] = s3 389 | 390 | succs, err := vn2.Notify(&vn1.Vnode) 391 | if err != nil { 392 | t.Fatalf("unexpected error! %s", err) 393 | } 394 | if succs[0] != s1 { 395 | t.Fatalf("unexpected succ 0") 396 | } 397 | if succs[1] != s2 { 398 | t.Fatalf("unexpected succ 1") 399 | } 400 | if succs[2] != s3 { 401 | t.Fatalf("unexpected succ 2") 402 | } 403 | if vn2.predecessor != &vn1.Vnode { 404 | t.Fatalf("unexpected pred") 405 | } 406 | } 407 | 408 | func TestVnodeNotifyNewPred(t *testing.T) { 409 | r := makeRing() 410 | sort.Sort(r) 411 | 412 | vn1 := r.vnodes[0] 413 | vn2 := r.vnodes[1] 414 | vn3 := r.vnodes[2] 415 | vn3.predecessor = &vn1.Vnode 416 | 417 | _, err := vn3.Notify(&vn2.Vnode) 418 | if err != nil { 419 | t.Fatalf("unexpected error! %s", err) 420 | } 421 | if vn3.predecessor != &vn2.Vnode { 422 | t.Fatalf("unexpected pred") 423 | } 424 | } 425 | 426 | func TestVnodeFixFinger(t *testing.T) { 427 | r := makeRing() 428 | sort.Sort(r) 429 | num := len(r.vnodes) 430 | for i := 0; i < num; i++ { 431 | r.vnodes[i].init(i) 432 | r.vnodes[i].successors[0] = &r.vnodes[(i+1)%num].Vnode 433 | } 434 | 435 | // Fix finger should not error 436 | vn := r.vnodes[0] 437 | if err := vn.fixFingerTable(); err != nil { 438 | t.Fatalf("unexpected err, %s", err) 439 | } 440 | 441 | // Check we've progressed 442 | if vn.last_finger != 158 { 443 | t.Fatalf("unexpected last finger! %d", vn.last_finger) 444 | } 445 | 446 | // Ensure that we've setup our successor as the initial entries 447 | for i := 0; i < vn.last_finger; i++ { 448 | if vn.finger[i] != vn.successors[0] { 449 | t.Fatalf("unexpected finger entry!") 450 | } 451 | } 452 | 453 | // Fix next index 454 | if err := vn.fixFingerTable(); err != nil { 455 | t.Fatalf("unexpected err, %s", err) 456 | } 457 | if vn.last_finger != 0 { 458 | t.Fatalf("unexpected last finger! %d", vn.last_finger) 459 | } 460 | } 461 | 462 | func TestVnodeCheckPredNoPred(t *testing.T) { 463 | v := makeVnode() 464 | v.init(0) 465 | if err := v.checkPredecessor(); err != nil { 466 | t.Fatalf("unpexected err! %s", err) 467 | } 468 | } 469 | 470 | func TestVnodeCheckLivePred(t *testing.T) { 471 | r := makeRing() 472 | sort.Sort(r) 473 | 474 | vn1 := r.vnodes[0] 475 | vn2 := r.vnodes[1] 476 | vn2.predecessor = &vn1.Vnode 477 | 478 | if err := vn2.checkPredecessor(); err != nil { 479 | t.Fatalf("unexpected error! %s", err) 480 | } 481 | if vn2.predecessor != &vn1.Vnode { 482 | t.Fatalf("unexpected pred") 483 | } 484 | } 485 | 486 | func TestVnodeCheckDeadPred(t *testing.T) { 487 | r := makeRing() 488 | sort.Sort(r) 489 | 490 | vn1 := r.vnodes[0] 491 | vn2 := r.vnodes[1] 492 | vn2.predecessor = &vn1.Vnode 493 | 494 | // Deregister vn1 495 | (r.transport.(*LocalTransport)).Deregister(&vn1.Vnode) 496 | 497 | if err := vn2.checkPredecessor(); err != nil { 498 | t.Fatalf("unexpected error! %s", err) 499 | } 500 | if vn2.predecessor != nil { 501 | t.Fatalf("unexpected pred") 502 | } 503 | } 504 | 505 | func TestVnodeFindSuccessors(t *testing.T) { 506 | r := makeRing() 507 | sort.Sort(r) 508 | num := len(r.vnodes) 509 | for i := 0; i < num; i++ { 510 | r.vnodes[i].successors[0] = &r.vnodes[(i+1)%num].Vnode 511 | } 512 | 513 | // Get a random key 514 | h := r.config.HashFunc() 515 | h.Write([]byte("test")) 516 | key := h.Sum(nil) 517 | 518 | // Local only, should be nearest in the ring 519 | nearest := r.nearestVnode(key) 520 | exp := nearest.successors[0] 521 | 522 | // Do a lookup on the key 523 | for i := 0; i < len(r.vnodes); i++ { 524 | vn := r.vnodes[i] 525 | succ, err := vn.FindSuccessors(1, key) 526 | if err != nil { 527 | t.Fatalf("unexpected err! %s", err) 528 | } 529 | 530 | // Local only, should be nearest in the ring 531 | if exp != succ[0] { 532 | t.Fatalf("unexpected succ! K:%x Exp: %s Got:%s", 533 | key, exp, succ[0]) 534 | } 535 | } 536 | } 537 | 538 | // Ensure each node has multiple successors 539 | func TestVnodeFindSuccessorsMultSucc(t *testing.T) { 540 | r := makeRing() 541 | sort.Sort(r) 542 | num := len(r.vnodes) 543 | for i := 0; i < num; i++ { 544 | r.vnodes[i].successors[0] = &r.vnodes[(i+1)%num].Vnode 545 | r.vnodes[i].successors[1] = &r.vnodes[(i+2)%num].Vnode 546 | r.vnodes[i].successors[2] = &r.vnodes[(i+3)%num].Vnode 547 | } 548 | 549 | // Get a random key 550 | h := r.config.HashFunc() 551 | h.Write([]byte("test")) 552 | key := h.Sum(nil) 553 | 554 | // Local only, should be nearest in the ring 555 | nearest := r.nearestVnode(key) 556 | exp := nearest.successors[0] 557 | 558 | // Do a lookup on the key 559 | for i := 0; i < len(r.vnodes); i++ { 560 | vn := r.vnodes[i] 561 | succ, err := vn.FindSuccessors(1, key) 562 | if err != nil { 563 | t.Fatalf("unexpected err! %s", err) 564 | } 565 | 566 | // Local only, should be nearest in the ring 567 | if exp != succ[0] { 568 | t.Fatalf("unexpected succ! K:%x Exp: %s Got:%s", 569 | key, exp, succ[0]) 570 | } 571 | } 572 | } 573 | 574 | // Kill off a part of the ring and see what happens 575 | func TestVnodeFindSuccessorsSomeDead(t *testing.T) { 576 | r := makeRing() 577 | sort.Sort(r) 578 | num := len(r.vnodes) 579 | for i := 0; i < num; i++ { 580 | r.vnodes[i].successors[0] = &r.vnodes[(i+1)%num].Vnode 581 | r.vnodes[i].successors[1] = &r.vnodes[(i+2)%num].Vnode 582 | } 583 | 584 | // Kill 2 of the nodes 585 | (r.transport.(*LocalTransport)).Deregister(&r.vnodes[0].Vnode) 586 | (r.transport.(*LocalTransport)).Deregister(&r.vnodes[3].Vnode) 587 | 588 | // Get a random key 589 | h := r.config.HashFunc() 590 | h.Write([]byte("test")) 591 | key := h.Sum(nil) 592 | 593 | // Local only, should be nearest in the ring 594 | nearest := r.nearestVnode(key) 595 | exp := nearest.successors[0] 596 | 597 | // Do a lookup on the key 598 | for i := 0; i < len(r.vnodes); i++ { 599 | vn := r.vnodes[i] 600 | succ, err := vn.FindSuccessors(1, key) 601 | if err != nil { 602 | t.Fatalf("(%d) unexpected err! %s", i, err) 603 | } 604 | 605 | // Local only, should be nearest in the ring 606 | if exp != succ[0] { 607 | t.Fatalf("(%d) unexpected succ! K:%x Exp: %s Got:%s", 608 | i, key, exp, succ[0]) 609 | } 610 | } 611 | } 612 | 613 | func TestVnodeClearPred(t *testing.T) { 614 | v := makeVnode() 615 | v.init(0) 616 | p := &Vnode{Id: []byte{12}} 617 | v.predecessor = p 618 | v.ClearPredecessor(p) 619 | if v.predecessor != nil { 620 | t.Fatalf("expect no predecessor!") 621 | } 622 | 623 | np := &Vnode{Id: []byte{14}} 624 | v.predecessor = p 625 | v.ClearPredecessor(np) 626 | if v.predecessor != p { 627 | t.Fatalf("expect p predecessor!") 628 | } 629 | } 630 | 631 | func TestVnodeSkipSucc(t *testing.T) { 632 | v := makeVnode() 633 | v.init(0) 634 | 635 | s1 := &Vnode{Id: []byte{10}} 636 | s2 := &Vnode{Id: []byte{11}} 637 | s3 := &Vnode{Id: []byte{12}} 638 | 639 | v.successors[0] = s1 640 | v.successors[1] = s2 641 | v.successors[2] = s3 642 | 643 | // s2 should do nothing 644 | if err := v.SkipSuccessor(s2); err != nil { 645 | t.Fatalf("unexpected err") 646 | } 647 | if v.successors[0] != s1 { 648 | t.Fatalf("unexpected suc") 649 | } 650 | 651 | // s1 should skip 652 | if err := v.SkipSuccessor(s1); err != nil { 653 | t.Fatalf("unexpected err") 654 | } 655 | if v.successors[0] != s2 { 656 | t.Fatalf("unexpected suc") 657 | } 658 | if v.knownSuccessors() != 2 { 659 | t.Fatalf("bad num of suc") 660 | } 661 | } 662 | 663 | func TestVnodeLeave(t *testing.T) { 664 | r := makeRing() 665 | sort.Sort(r) 666 | num := len(r.vnodes) 667 | for i := int(0); i < num; i++ { 668 | r.vnodes[i].predecessor = &r.vnodes[(i+num-1)%num].Vnode 669 | r.vnodes[i].successors[0] = &r.vnodes[(i+1)%num].Vnode 670 | r.vnodes[i].successors[1] = &r.vnodes[(i+2)%num].Vnode 671 | } 672 | 673 | // Make node 0 leave 674 | if err := r.vnodes[0].leave(); err != nil { 675 | t.Fatalf("unexpected err") 676 | } 677 | 678 | if r.vnodes[4].successors[0] != &r.vnodes[1].Vnode { 679 | t.Fatalf("unexpected suc!") 680 | } 681 | if r.vnodes[1].predecessor != nil { 682 | t.Fatalf("unexpected pred!") 683 | } 684 | } 685 | --------------------------------------------------------------------------------