├── cluster.go ├── cluster_test.go ├── coral.go ├── coral_handler.go ├── coral_net.go ├── notif.go └── query.go /cluster.go: -------------------------------------------------------------------------------- 1 | package coral 2 | 3 | import ( 4 | "time" 5 | 6 | kb "github.com/libp2p/go-libp2p-kbucket" 7 | peer "github.com/libp2p/go-libp2p-peer" 8 | pstore "github.com/libp2p/go-libp2p-peerstore" 9 | ) 10 | 11 | type ClusterID string 12 | 13 | type Cluster struct { 14 | routingTable *kb.RoutingTable 15 | clusterID ClusterID 16 | level int 17 | } 18 | 19 | func NewCluster(bucketsize int, localID kb.ID, latency time.Duration, m pstore.Metrics, level int, clustID ClusterID) *Cluster { 20 | c := new(Cluster) 21 | c.routingTable = kb.NewRoutingTable(bucketsize, localID, latency, m) 22 | c.clusterID = clustID 23 | c.level = level 24 | return c 25 | } 26 | 27 | func (clust *Cluster) ClustSize() int { 28 | 29 | return clust.routingTable.Size() 30 | 31 | } 32 | 33 | //get the "destination" node for the given id in the level 2 routing table 34 | //closest in XOR distance 35 | //The type ID signifies that its contents have been hashed from either a 36 | //peer.ID or a util.Key. This unifies the keyspace 37 | func (clust *Cluster) getDestinationNode(key string) peer.ID { 38 | tgtID := kb.ConvertKey(key) 39 | return clust.routingTable.NearestPeer(tgtID) 40 | } 41 | 42 | //get the "next" node on way to the destination node 43 | func (clust *Cluster) getNextNode(key string, pID peer.ID) peer.ID { 44 | 45 | tgtID := kb.XORMidpoint(pID, key) 46 | return clust.routingTable.NearestPeer(tgtID) 47 | 48 | } 49 | -------------------------------------------------------------------------------- /cluster_test.go: -------------------------------------------------------------------------------- 1 | package coral 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "errors" 7 | "fmt" 8 | 9 | "testing" 10 | "time" 11 | 12 | opts "github.com/libp2p/go-libp2p-kad-dht/opts" 13 | 14 | cid "github.com/ipfs/go-cid" 15 | u "github.com/ipfs/go-ipfs-util" 16 | logging "github.com/ipfs/go-log" 17 | 18 | peer "github.com/libp2p/go-libp2p-peer" 19 | pstore "github.com/libp2p/go-libp2p-peerstore" 20 | 21 | swarmt "github.com/libp2p/go-libp2p-swarm/testing" 22 | bhost "github.com/libp2p/go-libp2p/p2p/host/basic" 23 | 24 | ma "github.com/multiformats/go-multiaddr" 25 | ) 26 | 27 | var testCaseValues = map[string][]byte{} 28 | var testCaseCids []*cid.Cid 29 | var log = logging.Logger("dht") 30 | 31 | func init() { 32 | for i := 0; i < 100; i++ { 33 | v := fmt.Sprintf("%d -- value", i) 34 | 35 | mhv := u.Hash([]byte(v)) 36 | testCaseCids = append(testCaseCids, cid.NewCidV0(mhv)) 37 | } 38 | } 39 | 40 | type blankValidator struct{} 41 | 42 | func (blankValidator) Validate(_ string, _ []byte) error { return nil } 43 | func (blankValidator) Select(_ string, _ [][]byte) (int, error) { return 0, nil } 44 | 45 | type testValidator struct{} 46 | 47 | func (testValidator) Select(_ string, bs [][]byte) (int, error) { 48 | index := -1 49 | for i, b := range bs { 50 | if bytes.Compare(b, []byte("newer")) == 0 { 51 | index = i 52 | } else if bytes.Compare(b, []byte("valid")) == 0 { 53 | if index == -1 { 54 | index = i 55 | } 56 | } 57 | } 58 | if index == -1 { 59 | return -1, errors.New("no rec found") 60 | } 61 | return index, nil 62 | } 63 | func (testValidator) Validate(_ string, b []byte) error { 64 | if bytes.Compare(b, []byte("expired")) == 0 { 65 | return errors.New("expired") 66 | } 67 | return nil 68 | } 69 | 70 | func setupDHT(ctx context.Context, t *testing.T, client bool) *coralNode { 71 | c, err := New( 72 | ctx, 73 | bhost.New(swarmt.GenSwarm(t, ctx, swarmt.OptDisableReuseport)), 74 | opts.Client(client), 75 | opts.NamespacedValidator("v", blankValidator{}), 76 | ) 77 | 78 | if err != nil { 79 | t.Fatal(err) 80 | } 81 | return c 82 | } 83 | 84 | func setupDHTS(ctx context.Context, n int, t *testing.T) ([]ma.Multiaddr, []peer.ID, []*coralNode) { 85 | addrs := make([]ma.Multiaddr, n) 86 | dhts := make([]*coralNode, n) 87 | peers := make([]peer.ID, n) 88 | 89 | sanityAddrsMap := make(map[string]struct{}) 90 | sanityPeersMap := make(map[string]struct{}) 91 | 92 | for i := 0; i < n; i++ { 93 | dhts[i] = setupDHT(ctx, t, false) 94 | peers[i] = dhts[i].id 95 | addrs[i] = dhts[i].peerstore.Addrs(dhts[i].id)[0] 96 | 97 | if _, lol := sanityAddrsMap[addrs[i].String()]; lol { 98 | t.Fatal("While setting up DHTs address got duplicated.") 99 | } else { 100 | sanityAddrsMap[addrs[i].String()] = struct{}{} 101 | } 102 | if _, lol := sanityPeersMap[peers[i].String()]; lol { 103 | t.Fatal("While setting up DHTs peerid got duplicated.") 104 | } else { 105 | sanityPeersMap[peers[i].String()] = struct{}{} 106 | } 107 | } 108 | 109 | return addrs, peers, dhts 110 | } 111 | 112 | func connectNoSync(t *testing.T, ctx context.Context, a, b *coralNode) { 113 | t.Helper() 114 | 115 | idB := b.id 116 | addrB := b.peerstore.Addrs(idB) 117 | //a.levelTwo.routingTable.Update(idB) 118 | if len(addrB) == 0 { 119 | t.Fatal("peers setup incorrectly: no local address") 120 | } 121 | // nn1 := (*netNotifiee)(a) 122 | // nn2 := (*netNotifiee)(b) 123 | 124 | a.peerstore.AddAddrs(idB, addrB, pstore.TempAddrTTL) 125 | pi := pstore.PeerInfo{ID: idB} 126 | if err := a.host.Connect(ctx, pi); err != nil { 127 | t.Fatal(err) 128 | } 129 | 130 | // c12 := a.host.Network().ConnsToPeer(b.id)[0] 131 | // c21 := b.host.Network().ConnsToPeer(a.id)[0] 132 | 133 | // Pretend to reestablish/re-kill connection 134 | // nn1.Connected(a.host.Network(), c12) 135 | // nn2.Connected(b.host.Network(), c21) 136 | 137 | } 138 | 139 | func wait(t *testing.T, ctx context.Context, a, b *coralNode) { 140 | t.Helper() 141 | 142 | // loop until connection notification has been received. 143 | // under high load, this may not happen as immediately as we would like. 144 | for a.levelTwo.routingTable.Find(b.id) == "" { 145 | select { 146 | case <-ctx.Done(): 147 | t.Fatal(ctx.Err()) 148 | case <-time.After(time.Millisecond * 5): 149 | } 150 | } 151 | } 152 | 153 | func connect(t *testing.T, ctx context.Context, a, b *coralNode) { 154 | t.Helper() 155 | 156 | connectNoSync(t, ctx, a, b) 157 | wait(t, ctx, a, b) 158 | wait(t, ctx, b, a) 159 | } 160 | 161 | func TestValueGetSet(t *testing.T) { 162 | ctx, cancel := context.WithCancel(context.Background()) 163 | defer cancel() 164 | 165 | var dhts [10]*coralNode 166 | 167 | for i := range dhts { 168 | dhts[i] = setupDHT(ctx, t, false) 169 | defer dhts[i].Close() 170 | defer dhts[i].host.Close() 171 | } 172 | 173 | connect(t, ctx, dhts[0], dhts[1]) 174 | connect(t, ctx, dhts[0], dhts[2]) 175 | connect(t, ctx, dhts[0], dhts[3]) 176 | connect(t, ctx, dhts[0], dhts[4]) 177 | connect(t, ctx, dhts[1], dhts[2]) 178 | connect(t, ctx, dhts[1], dhts[3]) 179 | connect(t, ctx, dhts[1], dhts[4]) 180 | 181 | t.Log("adding value on: ", dhts[0].id) 182 | ctxT, cancel := context.WithTimeout(ctx, time.Second) 183 | defer cancel() 184 | err := dhts[0].PutValue(ctxT, "/v/hello", []byte("world")) 185 | if err != nil { 186 | t.Fatal(err) 187 | } 188 | 189 | t.Log("requesting value on dhts: ", dhts[1].id) 190 | ctxT, cancel = context.WithTimeout(ctx, time.Second*2) 191 | defer cancel() 192 | 193 | value, _ := dhts[1].GetValue(ctxT, "/v/hello") 194 | for val := range value { 195 | if err != nil { 196 | t.Fatal(err) 197 | } 198 | fmt.Printf("%s\n", string(val)) 199 | } 200 | //err = dhts[0].Value(ctxT, "/v/hello", []byte("world")) 201 | //edit this to be a channel 202 | // if string(val) != "world" { 203 | // t.Fatalf("Expected 'world' got '%s'", string(val)) 204 | /// } 205 | 206 | // // late connect 207 | // connect(t, ctx, dhts[2], dhts[0]) 208 | // connect(t, ctx, dhts[2], dhts[1]) 209 | 210 | // t.Log("requesting value (offline) on dhts: ", dhts[2].id) 211 | // vala, err := dhts[2].GetValue(ctxT, "/v/hello") 212 | // if vala != nil { 213 | // t.Fatalf("offline get should have failed, got %s", string(vala)) 214 | // } 215 | // if err != routing.ErrNotFound { 216 | // t.Fatalf("offline get should have failed with ErrNotFound, got: %s", err) 217 | // } 218 | // 219 | // t.Log("requesting value (online) on dhts: ", dhts[2].id) 220 | // val, err = dhts[2].GetValue(ctxT, "/v/hello") 221 | // if err != nil { 222 | // t.Fatal(err) 223 | // } 224 | // 225 | // if string(val) != "world" { 226 | // t.Fatalf("Expected 'world' got '%s'", string(val)) 227 | // } 228 | // 229 | // for _, d := range dhts[:3] { 230 | // connect(t, ctx, dhts[3], d) 231 | // } 232 | // connect(t, ctx, dhts[4], dhts[3]) 233 | // 234 | // t.Log("requesting value (requires peer routing) on dhts: ", dhts[4].id) 235 | // val, err = dhts[4].GetValue(ctxT, "/v/hello") 236 | // if err != nil { 237 | // t.Fatal(err) 238 | // } 239 | // 240 | // if string(val) != "world" { 241 | // t.Fatalf("Expected 'world' got '%s'", string(val)) 242 | // } 243 | } 244 | 245 | // 246 | // import ( 247 | // "testing" 248 | // 249 | // tu "github.com/libp2p/go-testutil" 250 | // ) 251 | // 252 | // func TestCluster(t *testing.T) { 253 | // pid := tu.RandPeerIDFatal(t) 254 | // n := NewCNode(pid) 255 | // 256 | // t.Logf("Cluster Two size: '%d'", n.levelTwoClustSize()) 257 | // t.Logf("Cluster One size: '%d'", n.levelOneClustSize()) 258 | // t.Logf("Cluster Zero size: '%d'", n.levelZeroClustSize()) 259 | // 260 | // pid = tu.RandPeerIDFatal(t) 261 | // n.addCNode(pid, "2") 262 | // pid = tu.RandPeerIDFatal(t) 263 | // n.addCNode(pid, "2") 264 | // t.Logf("Cluster Two size: '%d'", n.levelTwoClustSize()) 265 | // if n.levelTwoClustSize() != 2 { 266 | // t.Fatalf("Expected cluster size of 2, got %d ", n.levelTwoClustSize()) 267 | // } 268 | // cid := n.lookupClustID(pid) 269 | // t.Logf("Lookup returned '%s'", cid) 270 | // 271 | // n.joinCluster("345") 272 | // t.Logf("New ClusterID '%s'", n.levelTwo.clusterID) 273 | // t.Logf("Node id is %s", n.id) 274 | // pid = n.levelTwo.getNextNode("1", n.id) 275 | // t.Logf("Next node is '%s'", pid) 276 | // //n.insert("134", pid) 277 | // } 278 | -------------------------------------------------------------------------------- /coral.go: -------------------------------------------------------------------------------- 1 | package coral 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "math/big" 8 | "sync" 9 | "time" 10 | 11 | ds "github.com/ipfs/go-datastore" 12 | "github.com/jbenet/goprocess" 13 | goprocessctx "github.com/jbenet/goprocess/context" 14 | host "github.com/libp2p/go-libp2p-host" 15 | opts "github.com/libp2p/go-libp2p-kad-dht/opts" 16 | pb "github.com/libp2p/go-libp2p-kad-dht/pb" 17 | kb "github.com/libp2p/go-libp2p-kbucket" 18 | peer "github.com/libp2p/go-libp2p-peer" 19 | pstore "github.com/libp2p/go-libp2p-peerstore" 20 | protocol "github.com/libp2p/go-libp2p-protocol" 21 | record "github.com/libp2p/go-libp2p-record" 22 | recpb "github.com/libp2p/go-libp2p-record/pb" 23 | notif "github.com/libp2p/go-libp2p-routing/notifications" 24 | "github.com/whyrusleeping/base32" 25 | ) 26 | 27 | type coralNode struct { 28 | host host.Host // the network services we need 29 | id peer.ID // Local peer (yourself) 30 | ctx context.Context 31 | proc goprocess.Process 32 | datastore ds.Datastore // Local data 33 | 34 | peerstore pstore.Peerstore // Peer Registry 35 | Validator record.Validator 36 | // birth time.Time // When this peer started up 37 | plk sync.Mutex 38 | smlk sync.Mutex 39 | waitlk sync.Mutex 40 | peerToClust map[peer.ID]ClusterID //should be a triple, for now just implemented for level 2 41 | strmap map[peer.ID]*messageSender 42 | levelTwo *Cluster //peers are stored by their lowest common denominator 43 | levelOne *Cluster //cluster. for example, if a peer has the same level 2 44 | levelZero *Cluster //cluster id, it will be stored in the level 2 routing table. 45 | protocols []protocol.ID // cNode protocols 46 | } 47 | 48 | func New(ctx context.Context, h host.Host, options ...opts.Option) (*coralNode, error) { 49 | 50 | var cfg opts.Options 51 | if err := cfg.Apply(append([]opts.Option{opts.Defaults}, options...)...); err != nil { 52 | return nil, err 53 | } 54 | 55 | cfg.Protocols = []protocol.ID{protocol.ID("/ipfs/coral")} 56 | n := makeNode(ctx, h, cfg.Datastore, cfg.Protocols) 57 | // register for network notifs. 58 | n.host.Network().Notify((*netNotifiee)(n)) 59 | n.Validator = cfg.Validator 60 | n.proc = goprocessctx.WithContextAndTeardown(ctx, func() error { 61 | // remove ourselves from network notifs. 62 | n.host.Network().StopNotify((*netNotifiee)(n)) 63 | return nil 64 | }) 65 | 66 | for _, p := range cfg.Protocols { 67 | //fmt.Printf("%s\n", p) 68 | h.SetStreamHandler(p, n.handleNewStream) 69 | } 70 | 71 | return n, nil 72 | } 73 | 74 | func makeNode(ctx context.Context, h host.Host, dstore ds.Batching, protocols []protocol.ID) *coralNode { 75 | n := new(coralNode) 76 | n.id = h.ID() 77 | n.peerstore = h.Peerstore() 78 | n.host = h 79 | n.datastore = dstore 80 | n.ctx = ctx 81 | m := pstore.NewMetrics() 82 | n.levelTwo = NewCluster(10, kb.ConvertPeerID(n.id), time.Hour, m, 2, "2") //going to want to join clusters immediately via discovery 83 | n.levelOne = NewCluster(10, kb.ConvertPeerID(n.id), time.Hour, m, 1, "1") 84 | n.levelZero = NewCluster(10, kb.ConvertPeerID(n.id), time.Hour, m, 0, "0") 85 | n.peerToClust = make(map[peer.ID]ClusterID) 86 | n.strmap = make(map[peer.ID]*messageSender) 87 | n.protocols = protocols 88 | return n 89 | } 90 | 91 | func (cNode *coralNode) protocolStrs() []string { 92 | pstrs := make([]string, len(cNode.protocols)) 93 | for idx, proto := range cNode.protocols { 94 | pstrs[idx] = string(proto) 95 | } 96 | 97 | return pstrs 98 | } 99 | 100 | func (cNode *coralNode) Update(ctx context.Context, p peer.ID) { 101 | cNode.levelTwo.routingTable.Update(p) 102 | } 103 | 104 | func (cNode *coralNode) PutValue(ctx context.Context, key string, value []byte) error { 105 | return cNode.insert(ctx, key, value) 106 | } 107 | 108 | func (cNode *coralNode) GetValue(ctx context.Context, key string) (<-chan []byte, <-chan error) { 109 | 110 | values := make(chan []byte) 111 | errc := make(chan error) 112 | go func() { 113 | defer close(values) 114 | //check local data store first, no point in searching other nodes if you already 115 | //have the info! 116 | rec, err := cNode.getLocal(key) 117 | if err != nil { 118 | errc <- err 119 | } 120 | if rec != nil { 121 | value := rec.GetValue() 122 | values <- value 123 | 124 | } else { 125 | lastNodeReached, err := cNode.clusterSearch(ctx, cNode.id, values, 2, key) 126 | if err != nil { 127 | errc <- err 128 | } 129 | //level 1 cluster search pick up from where you left off 130 | lastNodeReached, err = cNode.clusterSearch(ctx, lastNodeReached, values, 1, key) 131 | if err != nil { 132 | errc <- err 133 | } 134 | //level 0 cluster search pick up from where you left off 135 | lastNodeReached, err = cNode.clusterSearch(ctx, lastNodeReached, values, 0, key) 136 | if err != nil { 137 | errc <- err 138 | } 139 | } 140 | 141 | }() 142 | 143 | return values, nil 144 | } 145 | 146 | func (cNode *coralNode) Close() error { 147 | return cNode.proc.Close() 148 | } 149 | 150 | func (cNode *coralNode) insert(ctx context.Context, key string, value []byte) error { 151 | //what is the total distance to the key 152 | totaldist := kb.Dist(cNode.id, key) 153 | dist := kb.Dist(cNode.id, key) 154 | //set epsilon to 1 155 | epsilon := big.NewInt(1) 156 | //create a stack of nodes to keep track of path to destination node 157 | var nodeStack []peer.ID 158 | //variable for the "midpoint key" or "fake key" generated each iteration 159 | nextKey := key 160 | var chosenNode peer.ID 161 | var err error 162 | //while distance is greater than epsilon 163 | for dist.Cmp(epsilon) > 0 { 164 | 165 | nextKey = kb.CalculateMidpointKey(nextKey, dist) //add dist to previous nextkey 166 | 167 | if dist.Cmp(totaldist) != 0 { //for first round check, this is messy 168 | 169 | wg := sync.WaitGroup{} 170 | wg.Add(1) 171 | go func(key string, p peer.ID, err error) { 172 | ctx, _ := context.WithCancel(ctx) 173 | defer wg.Done() 174 | peer, err := cNode.findAndAddNextNode(ctx, nextKey, chosenNode) 175 | notif.PublishQueryEvent(ctx, ¬if.QueryEvent{ 176 | Type: notif.Value, 177 | ID: peer, 178 | }) 179 | 180 | }(nextKey, chosenNode, err) 181 | 182 | wg.Wait() 183 | if err != nil { 184 | return err 185 | } 186 | } 187 | //after you have added the found node into your routing table, query the routing 188 | //table for the nearest peer to the key (in case you have a closer peer to this 189 | // midpoint key) 190 | chosenNode = cNode.levelTwo.routingTable.NearestPeer(kb.ConvertKey(nextKey)) 191 | nodeStack = append(nodeStack, chosenNode) 192 | dist = dist.Div(dist, big.NewInt(2)) 193 | 194 | } 195 | //put value to peer now 196 | rec := record.MakePutRecord(key, value) 197 | 198 | wg := sync.WaitGroup{} 199 | wg.Add(1) 200 | go func(p []peer.ID, rec *recpb.Record, key string, err error) { 201 | ctx, _ := context.WithCancel(ctx) 202 | 203 | defer wg.Done() 204 | 205 | peer, err := cNode.putValueToPeer(ctx, nodeStack, rec, key) 206 | 207 | notif.PublishQueryEvent(ctx, ¬if.QueryEvent{ 208 | Type: notif.Value, 209 | ID: peer, 210 | }) 211 | 212 | }(nodeStack, rec, key, err) 213 | 214 | wg.Wait() 215 | 216 | return err 217 | } 218 | 219 | func (cNode *coralNode) putValueToPeer(ctx context.Context, nodeStack []peer.ID, rec *recpb.Record, key string) (peer.ID, error) { 220 | fullAndLoaded := true 221 | var chosenNode peer.ID 222 | if len(nodeStack) == 0 { 223 | return chosenNode, errors.New("Attempted to put value with empty node stack.") 224 | } 225 | for fullAndLoaded { 226 | 227 | chosenNode = nodeStack[len(nodeStack)-1] 228 | pmes := pb.NewMessage(pb.Message_PUT_VALUE, []byte(key), 0) 229 | pmes.Record = rec 230 | _, err := cNode.sendRequest(ctx, chosenNode, pmes) 231 | 232 | if err != nil { 233 | if err == ErrReadTimeout { 234 | fmt.Printf("read timeout: %s %s", chosenNode.Pretty(), key) 235 | } 236 | 237 | return chosenNode, err 238 | } 239 | 240 | fullAndLoaded = false 241 | } 242 | return chosenNode, nil 243 | } 244 | 245 | func (cNode *coralNode) getLocal(key string) (*recpb.Record, error) { 246 | //log.Debugf("getLocal %s", key) 247 | rec, err := cNode.getRecordFromDatastore(mkDsKey(key)) 248 | if err != nil { 249 | // log.Warningf("getLocal: %s", err) 250 | return nil, err 251 | } 252 | 253 | // Double check the key. Can't hurt. 254 | if rec != nil && string(rec.GetKey()) != key { 255 | // log.Errorf("BUG getLocal: found a DHT record that didn't match it's key: %s != %s", rec.GetKey(), key) 256 | return nil, nil 257 | 258 | } 259 | return rec, nil 260 | } 261 | func mkDsKey(s string) ds.Key { 262 | return ds.NewKey(base32.RawStdEncoding.EncodeToString([]byte(s))) 263 | } 264 | 265 | func (cNode *coralNode) nearestPeersToQuery(pmes *pb.Message, p peer.ID, count int, cluster int) []peer.ID { 266 | var closer []peer.ID 267 | closer = cNode.levelTwo.routingTable.NearestPeers(kb.ConvertKey(string(pmes.GetKey())), count) 268 | 269 | // no node? nil 270 | if closer == nil { 271 | fmt.Printf("betterPeersToQuery: no closer peers to send: %s", p) 272 | return nil 273 | } 274 | 275 | filtered := make([]peer.ID, 0, len(closer)) 276 | for _, clp := range closer { 277 | 278 | // == to self? thats bad 279 | if clp == cNode.id { 280 | fmt.Printf("BUG betterPeersToQuery: attempted to return self! this shouldn't happen...") 281 | return nil 282 | } 283 | // Dont send a peer back themselves 284 | if clp == p { 285 | continue 286 | } 287 | 288 | filtered = append(filtered, clp) 289 | } 290 | return filtered 291 | 292 | } 293 | 294 | func (cNode *coralNode) findAndAddNextNode(ctx context.Context, key string, receiverNode peer.ID) (peer.ID, error) { 295 | //fmt.Printf("Entered findandAddNextNode for key: %s, and recievernode: %s \n", key, receiverNode) 296 | 297 | pmes := pb.NewMessage(pb.Message_FIND_NODE, []byte(key), 2) 298 | resp, err := cNode.sendRequest(ctx, receiverNode, pmes) 299 | if err != nil { 300 | return peer.ID(""), err 301 | } 302 | 303 | closer := resp.GetCloserPeers() 304 | clpeers := pb.PBPeersToPeerInfos(closer) 305 | var nextNode peer.ID 306 | if len(clpeers) > 0 { 307 | nextNode = clpeers[0].ID 308 | cNode.peerstore.AddAddrs(clpeers[0].ID, clpeers[0].Addrs, 6*time.Hour) 309 | } else { 310 | nextNode = receiverNode 311 | } 312 | /// once you get the next node, add it to your routing table 313 | cNode.Update(ctx, nextNode) 314 | 315 | //fmt.Printf("Found node id: %s\n", nextNode.Pretty()) 316 | return nextNode, nil 317 | } 318 | 319 | //Cluster search searches a given cluster that a node belongs to for a key 320 | //Want to surface most local values first 321 | func (cNode *coralNode) clusterSearch(ctx context.Context, nodeid peer.ID, Value chan []byte, clusterLevel int, key string) (peer.ID, error) { 322 | totaldist := kb.Dist(nodeid, key) 323 | dist := kb.Dist(nodeid, key) 324 | goalkey := key 325 | var mostRecentSuccess peer.ID 326 | epsilon := big.NewInt(1) 327 | var chosenNode peer.ID 328 | nextKey := key 329 | for dist.Cmp(epsilon) > 0 { 330 | 331 | nextKey = kb.CalculateMidpointKey(nextKey, dist) //add dist to previous Nextkey 332 | if dist.Cmp(totaldist) != 0 { 333 | 334 | if chosenNode != mostRecentSuccess { 335 | value, err := cNode.findNextNodeAndVal(ctx, goalkey, nextKey, chosenNode, clusterLevel) 336 | if err != nil { 337 | return chosenNode, err 338 | } 339 | if value != nil { 340 | mostRecentSuccess = chosenNode 341 | Value <- value 342 | } 343 | } 344 | } 345 | //query routing table 346 | chosenNode = cNode.levelTwo.routingTable.NearestPeer(kb.ConvertKey(nextKey)) 347 | 348 | dist = dist.Div(dist, big.NewInt(2)) 349 | } 350 | return chosenNode, nil 351 | 352 | } 353 | 354 | func (cNode *coralNode) findNextNodeAndVal(ctx context.Context, goalkey string, fakekey string, nodeid peer.ID, clusterlevel int) ([]byte, error) { 355 | var value []byte 356 | wg := sync.WaitGroup{} 357 | var err error 358 | wg.Add(1) 359 | go func(key string, p peer.ID, clusterlevel int, err error) { 360 | ctx, _ := context.WithCancel(ctx) 361 | defer wg.Done() 362 | // fmt.Printf("request from %s\n", p) 363 | pmes := pb.NewMessage(pb.Message_GET_VALUE, []byte(goalkey), clusterlevel) 364 | resp, err := cNode.sendRequest(ctx, p, pmes) 365 | if err != nil { 366 | return 367 | } 368 | value = resp.GetRecord().GetValue() 369 | 370 | }(goalkey, nodeid, clusterlevel, err) 371 | 372 | wg.Wait() 373 | if err != nil { 374 | return nil, err 375 | } 376 | wg = sync.WaitGroup{} 377 | wg.Add(1) 378 | go func(key string, p peer.ID, clusterlevel int, err error) { 379 | ctx, _ := context.WithCancel(ctx) 380 | 381 | defer wg.Done() 382 | 383 | pmesnode := pb.NewMessage(pb.Message_FIND_NODE, []byte(fakekey), clusterlevel) 384 | resp, err := cNode.sendRequest(ctx, nodeid, pmesnode) 385 | if err != nil { 386 | return 387 | } 388 | closer := resp.GetCloserPeers() 389 | clpeers := pb.PBPeersToPeerInfos(closer) 390 | var nextNode peer.ID 391 | if len(clpeers) > 0 { 392 | nextNode = clpeers[0].ID 393 | cNode.peerstore.AddAddrs(clpeers[0].ID, clpeers[0].Addrs, 6*time.Hour) 394 | } else { 395 | nextNode = nodeid 396 | } 397 | //once you get the next node, add it to your routing table 398 | cNode.Update(ctx, nextNode) 399 | }(fakekey, nodeid, clusterlevel, err) 400 | 401 | wg.Wait() 402 | 403 | if value == nil { 404 | return nil, nil 405 | } else { 406 | return value, nil 407 | } 408 | } 409 | 410 | //local node joins cluster 411 | func (cNode *coralNode) joinCluster(clustID ClusterID) ClusterID { 412 | 413 | cNode.levelTwo.clusterID = clustID 414 | return clustID 415 | 416 | } 417 | 418 | func (cNode *coralNode) levelTwoClustSize() int { 419 | 420 | return cNode.levelTwo.ClustSize() 421 | 422 | } 423 | func (cNode *coralNode) levelZeroClustSize() int { 424 | 425 | return cNode.levelZero.ClustSize() 426 | 427 | } 428 | 429 | func (cNode *coralNode) levelOneClustSize() int { 430 | 431 | return cNode.levelOne.ClustSize() 432 | 433 | } 434 | 435 | //peer cluster info is kept track of in a map 436 | //peer id info is kept track of in routing table 437 | func (cNode *coralNode) addCNode(p peer.ID, clust int) { 438 | if clust == 2 { 439 | cNode.levelTwo.routingTable.Update(p) //change 440 | cNode.peerToClust[p] = cNode.levelTwo.clusterID 441 | } else if clust == 1 { 442 | cNode.levelOne.routingTable.Update(p) 443 | cNode.peerToClust[p] = cNode.levelOne.clusterID 444 | } else { 445 | cNode.levelZero.routingTable.Update(p) 446 | cNode.peerToClust[p] = cNode.levelZero.clusterID 447 | } 448 | 449 | //update peer store? 450 | } 451 | 452 | //lookup a peer's cluster info 453 | //for now just implemented for level 2 only 454 | func (cNode *coralNode) lookupClustID(p peer.ID) ClusterID { 455 | return cNode.peerToClust[p] 456 | } 457 | 458 | func (cNode *coralNode) sortAllNodes() { 459 | peers := cNode.levelTwo.routingTable.ListPeers() 460 | 461 | for _, p := range peers { 462 | cNode.sortNode(p) 463 | } 464 | } 465 | func (cNode *coralNode) sortNode(p peer.ID) { 466 | 467 | //timeOne := time.Now() 468 | wg := sync.WaitGroup{} 469 | wg.Add(1) 470 | go func(p peer.ID) { 471 | ctx, cancel := context.WithCancel(cNode.ctx) 472 | defer cancel() 473 | defer wg.Done() 474 | pmes := pb.NewMessage(pb.Message_PING, nil, 0) 475 | _, err := cNode.sendRequest(ctx, p, pmes) 476 | if err != nil { 477 | } 478 | 479 | }(p) 480 | wg.Wait() 481 | //rTT := time.Now().Sub(timeOne) 482 | //seconds := rTT.Seconds() 483 | //if seconds >= 80000 { 484 | // cNode.levelZero.routingTable.Update(p) 485 | //level 0 486 | // } else if seconds >= 20000 { 487 | // cNode.levelOne.routingTable.Update(p) 488 | //level 1 489 | // } else { 490 | cNode.levelTwo.routingTable.Update(p) 491 | // level 2 492 | } 493 | -------------------------------------------------------------------------------- /coral_handler.go: -------------------------------------------------------------------------------- 1 | package coral 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "errors" 7 | "fmt" 8 | "time" 9 | 10 | proto "github.com/gogo/protobuf/proto" 11 | ds "github.com/ipfs/go-datastore" 12 | u "github.com/ipfs/go-ipfs-util" 13 | pb "github.com/libp2p/go-libp2p-kad-dht/pb" 14 | peer "github.com/libp2p/go-libp2p-peer" 15 | pstore "github.com/libp2p/go-libp2p-peerstore" 16 | recpb "github.com/libp2p/go-libp2p-record/pb" 17 | base32 "github.com/whyrusleeping/base32" 18 | //inet "github.com/libp2p/go-libp2p-net" 19 | ) 20 | 21 | type cNodeHandler func(context.Context, peer.ID, *pb.Message) (*pb.Message, error) 22 | 23 | func (cNode *coralNode) handlerForMsgType(t pb.Message_MessageType) cNodeHandler { 24 | switch t { 25 | case pb.Message_GET_VALUE: //gets value 26 | return cNode.handleGetValue 27 | case pb.Message_PUT_VALUE: //puts value 28 | return cNode.handlePutValue 29 | case pb.Message_FIND_NODE: //returns closer peers 30 | return cNode.handleFindPeer 31 | // case pb.Message_ADD_PROVIDER: 32 | // return cNode.handleAddProvider 33 | // case pb.Message_GET_PROVIDERS: 34 | // return cNode.handleGetProviders 35 | case pb.Message_PING: 36 | return cNode.handlePing 37 | default: 38 | return nil 39 | } 40 | } 41 | func cleanRecord(rec *recpb.Record) { 42 | rec.XXX_unrecognized = nil 43 | rec.TimeReceived = "" 44 | } 45 | 46 | func convertToDsKey(s string) ds.Key { 47 | return ds.NewKey(base32.RawStdEncoding.EncodeToString([]byte(s))) 48 | } 49 | 50 | func (cNode *coralNode) handleGetValue(ctx context.Context, p peer.ID, pmes *pb.Message) (_ *pb.Message, err error) { 51 | 52 | resp := pb.NewMessage(pmes.GetType(), pmes.GetKey(), pmes.GetClusterLevel()) 53 | 54 | // first, is there even a key? 55 | k := pmes.GetKey() 56 | if len(k) == 0 { 57 | return nil, errors.New("handleGetValue but no key was provided") 58 | // TODO: send back an error response? could be bad, but the other node's hanging. 59 | } 60 | 61 | rec, err := cNode.checkLocalDatastore(k) 62 | if err != nil { 63 | fmt.Printf("error") 64 | return nil, err 65 | } 66 | resp.Record = rec 67 | return resp, nil 68 | 69 | } 70 | 71 | func (cNode *coralNode) checkLocalDatastore(k []byte) (*recpb.Record, error) { 72 | // log.Debugf("%s handleGetValue looking into ds", dht.self) 73 | dskey := convertToDsKey(string(k)) 74 | buf, err := cNode.datastore.Get(dskey) 75 | //log.Debugf("%s handleGetValue looking into ds GOT %v", dht.self, buf) 76 | 77 | if err == ds.ErrNotFound { 78 | return nil, nil 79 | } 80 | 81 | // if we got an unexpected error, bail. 82 | if err != nil { 83 | return nil, err 84 | } 85 | 86 | // if we have the value, send it back 87 | //log.Debugf("%s handleGetValue success!", dht.self) 88 | 89 | rec := new(recpb.Record) 90 | s := buf.([]byte) 91 | err = proto.Unmarshal(s, rec) 92 | if err != nil { 93 | //fmt.Printf("failed to unmarshal DHT record from datastore") 94 | return nil, err 95 | } 96 | 97 | var recordIsBad bool 98 | _, err = u.ParseRFC3339(rec.GetTimeReceived()) 99 | if err != nil { 100 | //fmt.Printf("either no receive time set on record, or it was invalid: %s", err) 101 | recordIsBad = true 102 | } 103 | 104 | // if time.Now().Sub(recvtime) > MaxRecordAge { 105 | // // log.Debug("old record found, tossing.") 106 | // recordIsBad = true 107 | // } 108 | 109 | // NOTE: We do not verify the record here beyond checking these timestamps. 110 | // we put the burden of checking the records on the requester as checking a record 111 | // may be computationally expensive 112 | 113 | if recordIsBad { 114 | err := cNode.datastore.Delete(dskey) 115 | if err != nil { 116 | //fmt.Printf("Failed to delete bad record from datastore: %s", err) 117 | } 118 | 119 | return nil, nil // can treat this as not having the record at all 120 | } 121 | 122 | return rec, nil 123 | } 124 | 125 | func (cNode *coralNode) handlePutValue(ctx context.Context, p peer.ID, pmes *pb.Message) (_ *pb.Message, err error) { 126 | resp := pb.NewMessage(pmes.GetType(), nil, pmes.GetClusterLevel()) 127 | rec := pmes.GetRecord() 128 | if rec == nil { 129 | //log.Infof("Got nil record from: %s", p.Pretty()) 130 | return nil, errors.New("nil record") 131 | } 132 | 133 | if !bytes.Equal([]byte(pmes.GetKey()), rec.GetKey()) { 134 | return nil, errors.New("put key doesn't match record key") 135 | } 136 | 137 | cleanRecord(rec) 138 | 139 | // Make sure the record is valid (not expired, valid signature etc) 140 | if err = cNode.Validator.Validate(string(rec.GetKey()), rec.GetValue()); err != nil { 141 | //log.Warningf("Bad dht record in PUT from: %s. %s", p.Pretty(), err) 142 | return nil, err 143 | } 144 | 145 | dskey := convertToDsKey(string(rec.GetKey())) 146 | //TODO: add this check back in 147 | // Make sure the new record is "better" than the record we have locally. 148 | // This prevents a record with for example a lower sequence number from 149 | // overwriting a record with a higher sequence number. 150 | // existing, err := cNode.getRecordFromDatastore(dskey) 151 | // if err != nil { 152 | // return nil, err 153 | // } 154 | // 155 | // if existing != nil { 156 | // recs := [][]byte{rec.GetValue(), existing.GetValue()} 157 | // i, err := cNode.Validator.Select(rec.GetKey(), recs) 158 | // if err != nil { 159 | // //log.Warningf("Bad dht record in PUT from %s: %s", p.Pretty(), err) 160 | // return nil, err 161 | // } 162 | // if i != 0 { 163 | // //log.Infof("DHT record in PUT from %s is older than existing record. Ignoring", p.Pretty()) 164 | // return nil, errors.New("old record") 165 | // } 166 | // } 167 | 168 | // record the time we receive every record 169 | rec.TimeReceived = u.FormatRFC3339(time.Now()) 170 | 171 | data, err := proto.Marshal(rec) 172 | if err != nil { 173 | return nil, err 174 | } 175 | 176 | err = cNode.datastore.Put(dskey, data) 177 | //log.Debugf("%s handlePutValue %v", dht.self, dskey) 178 | return resp, err 179 | } 180 | 181 | func (cNode *coralNode) handleFindPeer(ctx context.Context, p peer.ID, pmes *pb.Message) (_ *pb.Message, err error) { 182 | 183 | resp := pb.NewMessage(pmes.GetType(), nil, pmes.GetClusterLevel()) 184 | var nearest []peer.ID 185 | 186 | nearest = cNode.nearestPeersToQuery(pmes, p, 2, pmes.GetClusterLevel()) 187 | 188 | nearestinfos := pstore.PeerInfos(cNode.peerstore, nearest) 189 | // possibly an over-allocation but this array is temporary anyways. 190 | withAddresses := make([]pstore.PeerInfo, 0, len(nearestinfos)) 191 | for _, pi := range nearestinfos { 192 | 193 | if len(pi.Addrs) > 0 { 194 | withAddresses = append(withAddresses, pi) 195 | 196 | //fmt.Printf("handleFindPeer: sending back '%s' %s\n", pi.ID, pi.Addrs) 197 | } 198 | } 199 | 200 | resp.CloserPeers = pb.PeerInfosToPBPeers(cNode.host.Network(), withAddresses) 201 | //fmt.Printf("Response closer peers %s\n", resp.CloserPeers) 202 | return resp, nil 203 | } 204 | 205 | func (cNode *coralNode) getRecordFromDatastore(dskey ds.Key) (*recpb.Record, error) { 206 | buf, err := cNode.datastore.Get(dskey) 207 | if err == ds.ErrNotFound { 208 | return nil, nil 209 | } 210 | if err != nil { 211 | //log.Errorf("Got error retrieving record with key %s from datastore: %s", dskey, err) 212 | return nil, err 213 | } 214 | rec := new(recpb.Record) 215 | s := buf.([]byte) 216 | err = proto.Unmarshal(s, rec) 217 | if err != nil { 218 | // Bad data in datastore, log it but don't return an error, we'll just overwrite it 219 | //log.Errorf("Bad record data stored in datastore with key %s: could not unmarshal record", dskey) 220 | return nil, nil 221 | } 222 | 223 | err = cNode.Validator.Validate(string(rec.GetKey()), rec.GetValue()) 224 | if err != nil { 225 | // Invalid record in datastore, probably expired but don't return an error, 226 | // we'll just overwrite it 227 | //log.Debugf("Local record verify failed: %s (discarded)", err) 228 | return nil, nil 229 | } 230 | 231 | return rec, nil 232 | } 233 | 234 | func (cNode *coralNode) handlePing(_ context.Context, p peer.ID, pmes *pb.Message) (*pb.Message, error) { 235 | //fmt.Printf("here") 236 | resp := pb.NewMessage(pmes.GetType(), nil, pmes.GetClusterLevel()) 237 | return resp, nil 238 | } 239 | -------------------------------------------------------------------------------- /coral_net.go: -------------------------------------------------------------------------------- 1 | package coral 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "sync" 8 | "time" 9 | 10 | ggio "github.com/gogo/protobuf/io" 11 | ctxio "github.com/jbenet/go-context/io" 12 | pb "github.com/libp2p/go-libp2p-kad-dht/pb" 13 | inet "github.com/libp2p/go-libp2p-net" 14 | peer "github.com/libp2p/go-libp2p-peer" 15 | ) 16 | 17 | var cNodeReadMessageTimeout = time.Minute 18 | var ErrReadTimeout = fmt.Errorf("timed out reading response") 19 | var waitlock sync.Mutex 20 | 21 | // handleNewStream implements the inet.StreamHandler 22 | func (cNode *coralNode) handleNewStream(s inet.Stream) { 23 | go cNode.handleNewMessage(s) 24 | } 25 | 26 | func (cNode *coralNode) handleNewMessage(s inet.Stream) { 27 | 28 | ctx := cNode.ctx 29 | cr := ctxio.NewReader(ctx, s) // ok to use. we defer close stream in this func 30 | cw := ctxio.NewWriter(ctx, s) // ok to use. we defer close stream in this func 31 | r := ggio.NewDelimitedReader(cr, inet.MessageSizeMax) 32 | w := ggio.NewDelimitedWriter(cw) 33 | mPeer := s.Conn().RemotePeer() 34 | 35 | for { 36 | // receive msg 37 | 38 | pmes := new(pb.Message) 39 | switch err := r.ReadMsg(pmes); err { 40 | case io.EOF: 41 | s.Close() 42 | return 43 | case nil: 44 | // fmt.Printf("no error") 45 | default: 46 | s.Reset() 47 | //fmt.Printf("Error unmarshaling data: %s\n", err) 48 | //fmt.Printf("%s\n", pmes.GetType()) 49 | return 50 | } 51 | 52 | // update the peer (on valid msgs only) 53 | cNode.updateFromMessage(ctx, mPeer, pmes) 54 | 55 | // get handler for this msg type. 56 | handler := cNode.handlerForMsgType(pmes.GetType()) 57 | //fmt.Printf("handle : %s\n", pmes.GetType()) 58 | if handler == nil { 59 | s.Reset() 60 | //fmt.Printf("got back nil handler from handlerForMsgType") 61 | return 62 | } 63 | 64 | // dispatch handler. 65 | rpmes, err := handler(ctx, mPeer, pmes) 66 | if err != nil { 67 | s.Reset() 68 | fmt.Printf("handle message error: %s", err) 69 | return 70 | } 71 | 72 | // if nil response, return it before serializing 73 | if rpmes == nil { 74 | fmt.Printf("got back nil response from request") 75 | continue 76 | } 77 | 78 | // send out response msg 79 | if err := w.WriteMsg(rpmes); err != nil { 80 | s.Reset() 81 | fmt.Printf("send response error: %s", err) 82 | return 83 | } 84 | } 85 | } 86 | 87 | // sendRequest sends out a request, but also makes sure to 88 | // measure the RTT for latency measurements. 89 | func (cNode *coralNode) sendRequest(ctx context.Context, p peer.ID, pmes *pb.Message) (*pb.Message, error) { 90 | 91 | ms, err := cNode.messageSenderForPeer(p) 92 | if err != nil { 93 | return nil, err 94 | } 95 | 96 | start := time.Now() 97 | 98 | rpmes, err := ms.SendRequest(ctx, pmes) 99 | if err != nil { 100 | return nil, err 101 | } 102 | 103 | // update the peer (on valid msgs only) 104 | cNode.updateFromMessage(ctx, p, rpmes) 105 | 106 | cNode.peerstore.RecordLatency(p, time.Since(start)) 107 | //log.Event(ctx, "cNode ReceivedMessage", cNode .self, p, rpmes) 108 | return rpmes, nil 109 | } 110 | 111 | // sendMessage sends out a message 112 | func (cNode *coralNode) sendMessage(ctx context.Context, p peer.ID, pmes *pb.Message) error { 113 | ms, err := cNode.messageSenderForPeer(p) 114 | if err != nil { 115 | return err 116 | } 117 | 118 | if err := ms.SendMessage(ctx, pmes); err != nil { 119 | return err 120 | } 121 | //log.Event(ctx, "cNode SentMessage", cNode .self, p, pmes) 122 | return nil 123 | } 124 | 125 | func (cNode *coralNode) updateFromMessage(ctx context.Context, p peer.ID, mes *pb.Message) error { 126 | // Make sure that this node is actually a cNode server, not just a client. 127 | protos, err := cNode.peerstore.SupportsProtocols(p, cNode.protocolStrs()...) 128 | if err == nil && len(protos) > 0 { 129 | cNode.Update(ctx, p) 130 | } 131 | return nil 132 | } 133 | 134 | func (cNode *coralNode) messageSenderForPeer(p peer.ID) (*messageSender, error) { 135 | cNode.smlk.Lock() 136 | ms, ok := cNode.strmap[p] 137 | if ok { 138 | cNode.smlk.Unlock() 139 | return ms, nil 140 | } 141 | ms = &messageSender{p: p, cNode: cNode} 142 | cNode.strmap[p] = ms 143 | cNode.smlk.Unlock() 144 | 145 | if err := ms.prepOrInvalidate(); err != nil { 146 | cNode.smlk.Lock() 147 | defer cNode.smlk.Unlock() 148 | 149 | msCur := cNode.strmap[p] 150 | // Changed. Use the new one, old one is invalid and 151 | // not in the map so we can just throw it away. 152 | if ms != msCur { 153 | return msCur, nil 154 | } 155 | // Not changed, remove the now invalid stream from the 156 | // map. 157 | delete(cNode.strmap, p) 158 | return nil, err 159 | } 160 | // Invalid but not in map. Must have been removed by a disconnect. 161 | 162 | // All ready to go. 163 | return ms, nil 164 | } 165 | 166 | type messageSender struct { 167 | s inet.Stream 168 | r ggio.ReadCloser 169 | w ggio.WriteCloser 170 | lk sync.Mutex 171 | p peer.ID 172 | cNode *coralNode 173 | 174 | invalid bool 175 | singleMes int 176 | } 177 | 178 | // invalidate is called before this messageSender is removed from the strmap. 179 | // It prevents the messageSender from being reused/reinitialized and then 180 | // forgotten (leaving the stream open). 181 | func (ms *messageSender) invalidate() { 182 | ms.invalid = true 183 | if ms.s != nil { 184 | //fmt.Printf("INVALIDATE") 185 | ms.s.Reset() 186 | ms.s = nil 187 | } 188 | } 189 | 190 | func (ms *messageSender) prepOrInvalidate() error { 191 | ms.lk.Lock() 192 | defer ms.lk.Unlock() 193 | if err := ms.prep(); err != nil { 194 | ms.invalidate() 195 | return err 196 | } 197 | return nil 198 | } 199 | 200 | func (ms *messageSender) prep() error { 201 | if ms.invalid { 202 | return fmt.Errorf("message sender has been invalidated") 203 | } 204 | if ms.s != nil { 205 | return nil 206 | } 207 | 208 | nstr, err := ms.cNode.host.NewStream(ms.cNode.ctx, ms.p, ms.cNode.protocols...) 209 | if err != nil { 210 | return err 211 | } 212 | 213 | ms.r = ggio.NewDelimitedReader(nstr, inet.MessageSizeMax) 214 | ms.w = ggio.NewDelimitedWriter(nstr) 215 | ms.s = nstr 216 | 217 | return nil 218 | } 219 | 220 | // streamReuseTries is the number of times we will try to reuse a stream to a 221 | // given peer before giving up and reverting to the old one-message-per-stream 222 | // behaviour. 223 | const streamReuseTries = 3 224 | 225 | func (ms *messageSender) SendMessage(ctx context.Context, pmes *pb.Message) error { 226 | ms.lk.Lock() 227 | defer ms.lk.Unlock() 228 | retry := false 229 | for { 230 | if err := ms.prep(); err != nil { 231 | return err 232 | } 233 | 234 | if err := ms.w.WriteMsg(pmes); err != nil { 235 | ms.s.Reset() 236 | ms.s = nil 237 | 238 | if retry { 239 | // log.Info("error writing message, bailing: ", err) 240 | return err 241 | } else { 242 | // log.Info("error writing message, trying again: ", err) 243 | retry = true 244 | continue 245 | } 246 | } 247 | 248 | //log.Event(ctx, "cNode SentMessage", ms.cNode .self, ms.p, pmes) 249 | 250 | if ms.singleMes > streamReuseTries { 251 | go inet.FullClose(ms.s) 252 | ms.s = nil 253 | } else if retry { 254 | ms.singleMes++ 255 | } 256 | 257 | return nil 258 | } 259 | } 260 | 261 | func (ms *messageSender) SendRequest(ctx context.Context, pmes *pb.Message) (*pb.Message, error) { 262 | ms.lk.Lock() 263 | defer ms.lk.Unlock() 264 | retry := false 265 | for { 266 | if err := ms.prep(); err != nil { 267 | return nil, err 268 | } 269 | 270 | if err := ms.w.WriteMsg(pmes); err != nil { 271 | ms.s.Reset() 272 | ms.s = nil 273 | 274 | if retry { 275 | //log.Info("error writing message, bailing: ", err) 276 | return nil, err 277 | } else { 278 | // log.Info("error writing message, trying again: ", err) 279 | retry = true 280 | continue 281 | } 282 | } 283 | 284 | mes := new(pb.Message) 285 | if err := ms.ctxReadMsg(ctx, mes); err != nil { 286 | ms.s.Reset() 287 | ms.s = nil 288 | 289 | if retry { 290 | fmt.Printf("error reading message, bailing: %s\n", err) 291 | 292 | return nil, err 293 | } else { 294 | fmt.Printf("error reading message, trying again: %s\n", err) 295 | retry = true 296 | continue 297 | } 298 | } 299 | 300 | //log.Event(ctx, "cNode SentMessage", ms.cNode .self, ms.p, pmes) 301 | 302 | if ms.singleMes > streamReuseTries { 303 | go inet.FullClose(ms.s) 304 | ms.s = nil 305 | } else if retry { 306 | ms.singleMes++ 307 | } 308 | 309 | return mes, nil 310 | } 311 | } 312 | 313 | func (ms *messageSender) ctxReadMsg(ctx context.Context, mes *pb.Message) error { 314 | // fmt.Printf("ctxReadMsg\n") 315 | errc := make(chan error, 1) 316 | go func(r ggio.ReadCloser) { 317 | errc <- r.ReadMsg(mes) 318 | }(ms.r) 319 | 320 | t := time.NewTimer(cNodeReadMessageTimeout) 321 | defer t.Stop() 322 | 323 | select { 324 | case err := <-errc: 325 | return err 326 | case <-ctx.Done(): 327 | return ctx.Err() 328 | case <-t.C: 329 | return ErrReadTimeout 330 | } 331 | } 332 | -------------------------------------------------------------------------------- /notif.go: -------------------------------------------------------------------------------- 1 | package coral 2 | 3 | import ( 4 | inet "github.com/libp2p/go-libp2p-net" 5 | ma "github.com/multiformats/go-multiaddr" 6 | mstream "github.com/multiformats/go-multistream" 7 | ) 8 | 9 | // netNotifiee defines methods to be used with the IpfscoralNode 10 | type netNotifiee coralNode 11 | 12 | func (nn *netNotifiee) coralNode() *coralNode { 13 | return (*coralNode)(nn) 14 | } 15 | 16 | func (nn *netNotifiee) Connected(n inet.Network, v inet.Conn) { 17 | coralNode := nn.coralNode() 18 | 19 | p := v.RemotePeer() 20 | protos, err := coralNode.peerstore.SupportsProtocols(p, coralNode.protocolStrs()...) 21 | if err == nil || len(protos) != 0 { 22 | 23 | coralNode.plk.Lock() 24 | defer coralNode.plk.Unlock() 25 | if coralNode.host.Network().Connectedness(p) == inet.Connected { 26 | coralNode.Update(coralNode.ctx, p) 27 | 28 | } 29 | return 30 | } 31 | // Note: Unfortunately, the peerstore may not yet know that this peer is 32 | // a coralNode server. So, if it didn't return a positive response above, test 33 | // manually. 34 | go nn.testConnection(v) 35 | } 36 | 37 | func (nn *netNotifiee) testConnection(v inet.Conn) { 38 | coralNode := nn.coralNode() 39 | p := v.RemotePeer() 40 | 41 | // Forcibly use *this* connection. Otherwise, if we have two connections, we could: 42 | // 1. Test it twice. 43 | // 2. Have it closed from under us leaving the second (open) connection untested. 44 | s, err := v.NewStream() 45 | if err != nil { 46 | // Connection error 47 | return 48 | } 49 | defer inet.FullClose(s) 50 | 51 | selected, err := mstream.SelectOneOf(coralNode.protocolStrs(), s) 52 | if err != nil { 53 | // Doesn't support the protocol 54 | return 55 | } 56 | // Remember this choice (makes subsequent negotiations faster) 57 | coralNode.peerstore.AddProtocols(p, selected) 58 | 59 | // We lock here as we race with disconnect. If we didn't lock, we could 60 | // finish processing a connect after handling the associated disconnect 61 | // event and add the peer to the routing table after removing it. 62 | coralNode.plk.Lock() 63 | defer coralNode.plk.Unlock() 64 | if coralNode.host.Network().Connectedness(p) == inet.Connected { 65 | 66 | coralNode.Update(coralNode.ctx, p) 67 | } 68 | } 69 | 70 | func (nn *netNotifiee) Disconnected(n inet.Network, v inet.Conn) { 71 | coralNode := nn.coralNode() 72 | select { 73 | case <-coralNode.proc.Closing(): 74 | return 75 | default: 76 | } 77 | 78 | p := v.RemotePeer() 79 | 80 | // Lock and check to see if we're still connected. We lock to make sure 81 | // we don't concurrently process a connect event. 82 | coralNode.plk.Lock() 83 | defer coralNode.plk.Unlock() 84 | if coralNode.host.Network().Connectedness(p) == inet.Connected { 85 | // We're still connected. 86 | return 87 | } 88 | 89 | coralNode.levelTwo.routingTable.Remove(p) 90 | 91 | coralNode.smlk.Lock() 92 | defer coralNode.smlk.Unlock() 93 | ms, ok := coralNode.strmap[p] 94 | if !ok { 95 | return 96 | } 97 | delete(coralNode.strmap, p) 98 | 99 | // Do this asynchronously as ms.lk can block for a while. 100 | go func() { 101 | ms.lk.Lock() 102 | defer ms.lk.Unlock() 103 | ms.invalidate() 104 | }() 105 | } 106 | func (nn *netNotifiee) OpenedStream(n inet.Network, v inet.Stream) { 107 | } 108 | func (nn *netNotifiee) ClosedStream(n inet.Network, v inet.Stream) { 109 | } 110 | func (nn *netNotifiee) Listen(n inet.Network, a ma.Multiaddr) {} 111 | func (nn *netNotifiee) ListenClose(n inet.Network, a ma.Multiaddr) { 112 | 113 | } 114 | -------------------------------------------------------------------------------- /query.go: -------------------------------------------------------------------------------- 1 | // package query implement a query manager to drive concurrent workers 2 | // to query the DHT. A query is setup with a target key, a queryFunc tasked 3 | // to communicate with a peer, and a set of initial peers. As the query 4 | // progress, queryFunc can return closer peers that will be used to navigate 5 | // closer to the target key in the DHT until an answer is reached. 6 | package coral 7 | 8 | import ( 9 | "context" 10 | "sync" 11 | 12 | u "github.com/ipfs/go-ipfs-util" 13 | //logging "github.com/ipfs/go-//log" 14 | todoctr "github.com/ipfs/go-todocounter" 15 | process "github.com/jbenet/goprocess" 16 | ctxproc "github.com/jbenet/goprocess/context" 17 | inet "github.com/libp2p/go-libp2p-net" 18 | peer "github.com/libp2p/go-libp2p-peer" 19 | pset "github.com/libp2p/go-libp2p-peer/peerset" 20 | pstore "github.com/libp2p/go-libp2p-peerstore" 21 | queue "github.com/libp2p/go-libp2p-peerstore/queue" 22 | routing "github.com/libp2p/go-libp2p-routing" 23 | notif "github.com/libp2p/go-libp2p-routing/notifications" 24 | ) 25 | 26 | var maxQueryConcurrency = 4 27 | 28 | type dhtQuery struct { 29 | dht *coralNode 30 | key string // the key we're querying for 31 | qfunc queryFunc // the function to execute per peer 32 | concurrency int // the concurrency parameter 33 | } 34 | 35 | type dhtQueryResult struct { 36 | value []byte // GetValue 37 | peer *pstore.PeerInfo // FindPeer 38 | providerPeers []pstore.PeerInfo // GetProviders 39 | closerPeers []*pstore.PeerInfo // * 40 | success bool 41 | 42 | finalSet *pset.PeerSet 43 | queriedSet *pset.PeerSet 44 | } 45 | 46 | // constructs query 47 | func (dht *coralNode) newQuery(k string, f queryFunc) *dhtQuery { 48 | return &dhtQuery{ 49 | key: k, 50 | dht: dht, 51 | qfunc: f, 52 | concurrency: maxQueryConcurrency, 53 | } 54 | } 55 | 56 | // QueryFunc is a function that runs a particular query with a given peer. 57 | // It returns either: 58 | // - the value 59 | // - a list of peers potentially better able to serve the query 60 | // - an error 61 | type queryFunc func(context.Context, peer.ID) (*dhtQueryResult, error) 62 | 63 | // Run runs the query at hand. pass in a list of peers to use first. 64 | func (q *dhtQuery) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) { 65 | select { 66 | case <-ctx.Done(): 67 | return nil, ctx.Err() 68 | default: 69 | } 70 | 71 | ctx, cancel := context.WithCancel(ctx) 72 | defer cancel() 73 | 74 | runner := newQueryRunner(q) 75 | return runner.Run(ctx, peers) 76 | } 77 | 78 | type dhtQueryRunner struct { 79 | query *dhtQuery // query to run 80 | peersSeen *pset.PeerSet // all peers queried. prevent querying same peer 2x 81 | peersQueried *pset.PeerSet // peers successfully connected to and queried 82 | peersToQuery *queue.ChanQueue // peers remaining to be queried 83 | peersRemaining todoctr.Counter // peersToQuery + currently processing 84 | 85 | result *dhtQueryResult // query result 86 | errs u.MultiErr // result errors. maybe should be a map[peer.ID]error 87 | 88 | rateLimit chan struct{} // processing semaphore 89 | //log //logging.Event//logger 90 | 91 | runCtx context.Context 92 | 93 | proc process.Process 94 | sync.RWMutex 95 | } 96 | 97 | func newQueryRunner(q *dhtQuery) *dhtQueryRunner { 98 | proc := process.WithParent(process.Background()) 99 | ctx := ctxproc.OnClosingContext(proc) 100 | return &dhtQueryRunner{ 101 | query: q, 102 | peersToQuery: queue.NewChanQueue(ctx, queue.NewXORDistancePQ(string(q.key))), 103 | peersRemaining: todoctr.NewSyncCounter(), 104 | peersSeen: pset.New(), 105 | peersQueried: pset.New(), 106 | rateLimit: make(chan struct{}, q.concurrency), 107 | proc: proc, 108 | } 109 | } 110 | 111 | func (r *dhtQueryRunner) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) { 112 | // r.//log = //log 113 | r.runCtx = ctx 114 | 115 | if len(peers) == 0 { 116 | //log.Warning("Running query with no peers!") 117 | return nil, nil 118 | } 119 | 120 | // setup concurrency rate limiting 121 | for i := 0; i < r.query.concurrency; i++ { 122 | r.rateLimit <- struct{}{} 123 | } 124 | 125 | // add all the peers we got first. 126 | for _, p := range peers { 127 | r.addPeerToQuery(p) 128 | } 129 | 130 | // go do this thing. 131 | // do it as a child proc to make sure Run exits 132 | // ONLY AFTER spawn workers has exited. 133 | r.proc.Go(r.spawnWorkers) 134 | 135 | // so workers are working. 136 | 137 | // wait until they're done. 138 | err := routing.ErrNotFound 139 | 140 | // now, if the context finishes, close the proc. 141 | // we have to do it here because the //logic before is setup, which 142 | // should run without closing the proc. 143 | ctxproc.CloseAfterContext(r.proc, ctx) 144 | 145 | select { 146 | case <-r.peersRemaining.Done(): 147 | r.proc.Close() 148 | r.RLock() 149 | defer r.RUnlock() 150 | 151 | err = routing.ErrNotFound 152 | 153 | // if every query to every peer failed, something must be very wrong. 154 | if len(r.errs) > 0 && len(r.errs) == r.peersSeen.Size() { 155 | //log.Debugf("query errs: %s", r.errs) 156 | err = r.errs[0] 157 | } 158 | 159 | case <-r.proc.Closed(): 160 | r.RLock() 161 | defer r.RUnlock() 162 | err = context.DeadlineExceeded 163 | } 164 | 165 | if r.result != nil && r.result.success { 166 | return r.result, nil 167 | } 168 | 169 | return &dhtQueryResult{ 170 | finalSet: r.peersSeen, 171 | queriedSet: r.peersQueried, 172 | }, err 173 | } 174 | 175 | func (r *dhtQueryRunner) addPeerToQuery(next peer.ID) { 176 | // if new peer is ourselves... 177 | if next == r.query.dht.id { 178 | // r.//log.Debug("addPeerToQuery skip self") 179 | return 180 | } 181 | 182 | if !r.peersSeen.TryAdd(next) { 183 | return 184 | } 185 | 186 | notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{ 187 | Type: notif.AddingPeer, 188 | ID: next, 189 | }) 190 | 191 | r.peersRemaining.Increment(1) 192 | select { 193 | case r.peersToQuery.EnqChan <- next: 194 | case <-r.proc.Closing(): 195 | } 196 | } 197 | 198 | func (r *dhtQueryRunner) spawnWorkers(proc process.Process) { 199 | for { 200 | 201 | select { 202 | case <-r.peersRemaining.Done(): 203 | return 204 | 205 | case <-r.proc.Closing(): 206 | return 207 | 208 | case <-r.rateLimit: 209 | select { 210 | case p, more := <-r.peersToQuery.DeqChan: 211 | if !more { 212 | return // channel closed. 213 | } 214 | 215 | // do it as a child func to make sure Run exits 216 | // ONLY AFTER spawn workers has exited. 217 | proc.Go(func(proc process.Process) { 218 | r.queryPeer(proc, p) 219 | }) 220 | case <-r.proc.Closing(): 221 | return 222 | case <-r.peersRemaining.Done(): 223 | return 224 | } 225 | } 226 | } 227 | } 228 | 229 | func (r *dhtQueryRunner) queryPeer(proc process.Process, p peer.ID) { 230 | // ok let's do this! 231 | 232 | // create a context from our proc. 233 | ctx := ctxproc.OnClosingContext(proc) 234 | 235 | // make sure we do this when we exit 236 | defer func() { 237 | // signal we're done processing peer p 238 | r.peersRemaining.Decrement(1) 239 | r.rateLimit <- struct{}{} 240 | }() 241 | 242 | // make sure we're connected to the peer. 243 | // FIXME abstract away into the network layer 244 | // Note: Failure to connect in this block will cause the function to 245 | // short circuit. 246 | if r.query.dht.host.Network().Connectedness(p) == inet.NotConnected { 247 | //log.Debug("not connected. dialing.") 248 | 249 | notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{ 250 | Type: notif.DialingPeer, 251 | ID: p, 252 | }) 253 | // while we dial, we do not take up a rate limit. this is to allow 254 | // forward progress during potentially very high latency dials. 255 | r.rateLimit <- struct{}{} 256 | 257 | pi := pstore.PeerInfo{ID: p} 258 | 259 | if err := r.query.dht.host.Connect(ctx, pi); err != nil { 260 | //log.Debugf("Error connecting: %s", err) 261 | 262 | notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{ 263 | Type: notif.QueryError, 264 | Extra: err.Error(), 265 | ID: p, 266 | }) 267 | 268 | r.Lock() 269 | r.errs = append(r.errs, err) 270 | r.Unlock() 271 | <-r.rateLimit // need to grab it again, as we deferred. 272 | return 273 | } 274 | <-r.rateLimit // need to grab it again, as we deferred. 275 | //log.Debugf("connected. dial success.") 276 | } 277 | 278 | // finally, run the query against this peer 279 | res, err := r.query.qfunc(ctx, p) 280 | 281 | r.peersQueried.Add(p) 282 | 283 | if err != nil { 284 | //log.Debugf("ERROR worker for: %v %v", p, err) 285 | r.Lock() 286 | r.errs = append(r.errs, err) 287 | r.Unlock() 288 | 289 | } else if res.success { 290 | //log.Debugf("SUCCESS worker for: %v %s", p, res) 291 | r.Lock() 292 | r.result = res 293 | r.Unlock() 294 | go r.proc.Close() // signal to everyone that we're done. 295 | // must be async, as we're one of the children, and Close blocks. 296 | 297 | } else if len(res.closerPeers) > 0 { 298 | //log.Debugf("PEERS CLOSER -- worker for: %v (%d closer peers)", p, len(res.closerPeers)) 299 | for _, next := range res.closerPeers { 300 | if next.ID == r.query.dht.id { // don't add self. 301 | //log.Debugf("PEERS CLOSER -- worker for: %v found self", p) 302 | continue 303 | } 304 | 305 | // add their addresses to the dialer's peerstore 306 | r.query.dht.peerstore.AddAddrs(next.ID, next.Addrs, pstore.TempAddrTTL) 307 | r.addPeerToQuery(next.ID) 308 | //log.Debugf("PEERS CLOSER -- worker for: %v added %v (%v)", p, next.ID, next.Addrs) 309 | } 310 | } else { 311 | //log.Debugf("QUERY worker for: %v - not found, and no closer peers.", p) 312 | } 313 | } 314 | --------------------------------------------------------------------------------