├── LICENSE ├── README.md ├── node.go └── node_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Stovepipe Studios, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dht 2 | 3 | `dht` is a [distributed hash table][wiki_dht] implementation that uses Consul 4 | and [rendezvous hashing][wiki_rendez] to distribute keys among distributed 5 | nodes. Because it uses rendezvous hashing to determine key placement, removing a 6 | node from the hash table is minimally disruptive in terms of key re-assignment. 7 | 8 | Local hash table state is refreshed in a background goroutine using blocking 9 | Consul API queries with the [default consistency mode][consul_api]. Errors 10 | encountered in the background goroutine are logged using the `log` package in 11 | the following format: 12 | 13 | [dht ] error: 14 | 15 | `dht` requires a locally-running Consul agent (version 0.5.2 or newer) with its 16 | HTTP API listening on `127.0.0.1:8500`. `dht` nodes run a simple HTTP server on 17 | an available ephemeral port to allow Consul to periodically check that the node 18 | is still alive. 19 | 20 | [wiki_dht]: https://en.wikipedia.org/wiki/Distributed_hash_table 21 | [wiki_rendez]: https://en.wikipedia.org/wiki/Rendezvous_hashing 22 | [consul_api]: https://www.consul.io/docs/agent/http.html 23 | 24 | ## Example 25 | 26 | ```go 27 | node1, err := dht.Join("worker", "worker-1") 28 | node2, err := dht.Join("worker", "worker-2") 29 | 30 | node1.Member("some_key") // true 31 | node2.Member("some_key") // false 32 | 33 | err = node1.Leave() 34 | err = node2.Leave() 35 | ``` 36 | -------------------------------------------------------------------------------- /node.go: -------------------------------------------------------------------------------- 1 | package dht 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net" 7 | "net/http" 8 | "time" 9 | 10 | "github.com/hashicorp/consul/api" 11 | "github.com/stvp/rendezvous" 12 | ) 13 | 14 | const ( 15 | checkInterval = 5 * time.Second 16 | pollWait = time.Second 17 | ) 18 | 19 | func newCheckListenerAndServer() (listener net.Listener, server *http.Server, err error) { 20 | listener, err = net.Listen("tcp", "127.0.0.1:0") 21 | if err != nil { 22 | return listener, nil, err 23 | } 24 | 25 | server = &http.Server{ 26 | ReadTimeout: time.Second, 27 | WriteTimeout: time.Second, 28 | Handler: http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { 29 | fmt.Fprintf(resp, "OK") 30 | }), 31 | } 32 | 33 | // When the listener is closed, this goroutine returns. 34 | go server.Serve(listener) 35 | 36 | return listener, server, err 37 | } 38 | 39 | // Node is a single node in a distributed hash table, coordinated using 40 | // services registered in Consul. Key membership is determined using rendezvous 41 | // hashing to ensure even distribution of keys and minimal key membership 42 | // changes when a Node fails or otherwise leaves the hash table. 43 | // 44 | // Errors encountered when making blocking GET requests to the Consul agent API 45 | // are logged using the log package. 46 | type Node struct { 47 | // Consul 48 | serviceName string 49 | serviceID string 50 | consul *api.Client 51 | 52 | // HTTP health check server 53 | checkURL string 54 | checkListener net.Listener 55 | checkServer *http.Server 56 | 57 | // Hash table 58 | hashTable *rendezvous.Table 59 | waitIndex uint64 60 | 61 | // Graceful shutdown 62 | stop chan bool 63 | } 64 | 65 | // Join creates a new Node and adds it to the distributed hash table specified 66 | // by the given name. The given id should be unique among all Nodes in the hash 67 | // table. 68 | func Join(name, id string) (node *Node, err error) { 69 | node = &Node{ 70 | serviceName: name, 71 | serviceID: id, 72 | stop: make(chan bool), 73 | } 74 | 75 | node.consul, err = api.NewClient(api.DefaultConfig()) 76 | if err != nil { 77 | return nil, fmt.Errorf("dht: can't create Consul API client: %s", err) 78 | } 79 | 80 | node.checkListener, node.checkServer, err = newCheckListenerAndServer() 81 | if err != nil { 82 | return nil, fmt.Errorf("dht: can't start HTTP server: %s", err) 83 | } 84 | 85 | err = node.register() 86 | if err != nil { 87 | return nil, fmt.Errorf("dht: can't register %s service: %s", node.serviceName, err) 88 | } 89 | 90 | err = node.update() 91 | if err != nil { 92 | return nil, fmt.Errorf("dht: can't fetch %s services list: %s", node.serviceName, err) 93 | } 94 | 95 | go node.poll() 96 | 97 | return node, nil 98 | } 99 | 100 | func (n *Node) register() (err error) { 101 | err = n.consul.Agent().ServiceRegister(&api.AgentServiceRegistration{ 102 | Name: n.serviceName, 103 | ID: n.serviceID, 104 | Check: &api.AgentServiceCheck{ 105 | HTTP: fmt.Sprintf("http://%s", n.checkListener.Addr().String()), 106 | Interval: checkInterval.String(), 107 | }, 108 | }) 109 | return err 110 | } 111 | 112 | func (n *Node) poll() { 113 | var err error 114 | 115 | for { 116 | select { 117 | case <-n.stop: 118 | return 119 | case <-time.After(pollWait): 120 | err = n.update() 121 | if err != nil { 122 | log.Printf("[dht %s %s] error: %s", n.serviceName, n.serviceID, err) 123 | } 124 | } 125 | } 126 | } 127 | 128 | // update blocks until the service list changes or until the Consul agent's 129 | // timeout is reached (10 minutes by default). 130 | func (n *Node) update() (err error) { 131 | opts := &api.QueryOptions{WaitIndex: n.waitIndex} 132 | serviceEntries, meta, err := n.consul.Health().Service(n.serviceName, "", true, opts) 133 | if err != nil { 134 | return err 135 | } 136 | 137 | ids := make([]string, len(serviceEntries)) 138 | for i, entry := range serviceEntries { 139 | ids[i] = entry.Service.ID 140 | } 141 | 142 | n.hashTable = rendezvous.New(ids) 143 | n.waitIndex = meta.LastIndex 144 | 145 | return nil 146 | } 147 | 148 | // Member returns true if the given key belongs to this Node in the distributed 149 | // hash table. 150 | func (n *Node) Member(key string) bool { 151 | return n.hashTable.Get(key) == n.serviceID 152 | } 153 | 154 | // Leave removes the Node from the distributed hash table by de-registering it 155 | // from Consul. Once Leave is called, the Node should be discarded. An error is 156 | // returned if the Node is unable to successfully deregister itself from 157 | // Consul. In that case, Consul's health check for the Node will fail and 158 | // require manual cleanup. 159 | func (n *Node) Leave() (err error) { 160 | close(n.stop) // stop polling for state 161 | err = n.consul.Agent().ServiceDeregister(n.serviceID) 162 | n.checkListener.Close() // stop the health check http server 163 | return err 164 | } 165 | -------------------------------------------------------------------------------- /node_test.go: -------------------------------------------------------------------------------- 1 | package dht 2 | 3 | import ( 4 | "strconv" 5 | "testing" 6 | "time" 7 | 8 | "github.com/hashicorp/consul/api" 9 | "github.com/stvp/tempconsul" 10 | ) 11 | 12 | func apiClient() (client *api.Client) { 13 | client, _ = api.NewClient(api.DefaultConfig()) 14 | return client 15 | } 16 | 17 | func startConsul() (server *tempconsul.Server, err error) { 18 | server = &tempconsul.Server{} 19 | return server, server.Start() 20 | } 21 | 22 | func servicesCount(name string) (count int, err error) { 23 | services, _, err := apiClient().Catalog().Service(name, "", nil) 24 | return len(services), err 25 | } 26 | 27 | func TestJoinLeave(t *testing.T) { 28 | // No Consul agent 29 | _, err := Join("test", "a") 30 | if err == nil { 31 | t.Errorf("expected error got nil") 32 | } 33 | 34 | // Start Consul agent 35 | server, err := startConsul() 36 | if err != nil { 37 | t.Fatal(err) 38 | } 39 | defer server.Term() 40 | 41 | // Valid join 42 | node, err := Join("test", "a") 43 | if err != nil { 44 | t.Fatal(err) 45 | } 46 | 47 | count, err := servicesCount("test") 48 | if err != nil { 49 | t.Fatal(err) 50 | } 51 | if count != 1 { 52 | t.Errorf("expected 1 service registered, got %d", count) 53 | } 54 | 55 | // Leave 56 | err = node.Leave() 57 | if err != nil { 58 | t.Error(err) 59 | } 60 | 61 | count, err = servicesCount("test") 62 | if err != nil { 63 | t.Fatal(err) 64 | } 65 | if count != 0 { 66 | t.Errorf("expected 0 service registered, got %d", count) 67 | } 68 | } 69 | 70 | func TestMember(t *testing.T) { 71 | server, err := startConsul() 72 | if err != nil { 73 | t.Fatal(err) 74 | } 75 | defer server.Term() 76 | 77 | n := 3 78 | nodes := make([]*Node, n) 79 | for i := 0; i < n; i++ { 80 | nodes[i], err = Join("test", strconv.Itoa(i)) 81 | if err != nil { 82 | t.Fatal(err) 83 | } 84 | } 85 | 86 | // Ensure nodes have the latest state. First we have to wait for consul 87 | // to run all checks so that all services are "passing". 88 | time.Sleep(checkInterval * 2) 89 | for _, node := range nodes { 90 | node.waitIndex = 0 91 | node.update() 92 | } 93 | 94 | tests := []struct { 95 | key string 96 | member []bool 97 | }{ 98 | {"", []bool{true, false, false}}, 99 | {"a", []bool{true, false, false}}, 100 | {"b", []bool{false, false, true}}, 101 | {"d9edf13e917c4f0f66be0e80cc30060e", []bool{false, true, false}}, 102 | {"a2a9538886f1df96be9e5b52b14b404a", []bool{false, false, true}}, 103 | } 104 | 105 | for _, test := range tests { 106 | for i, node := range nodes { 107 | expect := test.member[i] 108 | got := node.Member(test.key) 109 | if got != expect { 110 | t.Errorf("nodes[%d].Member(%#v): expected %v, got %v", i, test.key, expect, got) 111 | } 112 | } 113 | } 114 | 115 | // Clean up 116 | for _, node := range nodes { 117 | err = node.Leave() 118 | if err != nil { 119 | t.Error(err) 120 | } 121 | } 122 | } 123 | --------------------------------------------------------------------------------