├── .circleci └── config.yml ├── .gitignore ├── LICENSE ├── README.md ├── _metcd ├── README.md ├── ctrl.go ├── ctrl_test.go ├── etcd_store.go ├── key_helpers.go ├── membership.go ├── metcdsrv │ ├── main.go │ ├── quickcluster.bash │ └── sanitycheck.bash ├── packet_transport.go └── server.go ├── connection.go ├── connection_maker.go ├── examples └── increment-only-counter │ ├── README.md │ ├── main.go │ ├── peer.go │ ├── peer_test.go │ ├── state.go │ └── state_test.go ├── go.mod ├── go.sum ├── gossip.go ├── gossip_channel.go ├── gossip_test.go ├── lint ├── local_peer.go ├── logger.go ├── meshconn ├── README.md ├── mesh_addr.go ├── peer.go └── pkt.go ├── mocks_test.go ├── overlay.go ├── peer.go ├── peer_name_hash.go ├── peer_name_hash_test.go ├── peer_name_mac.go ├── peer_name_mac_test.go ├── peer_test.go ├── peers.go ├── peers_test.go ├── protocol.go ├── protocol_crypto.go ├── protocol_crypto_test.go ├── protocol_test.go ├── router.go ├── routes.go ├── routes_test.go ├── status.go ├── surrogate_gossiper.go ├── surrogate_gossiper_test.go ├── token_bucket.go └── token_bucket_test.go /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | jobs: 4 | build: 5 | docker: 6 | - image: golang:1.13.1-stretch 7 | working_directory: /go/src/github.com/weaveworks/mesh 8 | steps: 9 | - checkout 10 | - run: 11 | name: Lint 12 | command: | 13 | ./lint 14 | - run: 15 | name: Test 16 | command: | 17 | go test -v 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | # Bad smells 27 | Makefile 28 | Dockerfile 29 | 30 | examples/increment-only-counter/increment-only-counter 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mesh [![GoDoc](https://godoc.org/github.com/weaveworks/mesh?status.svg)](https://godoc.org/github.com/weaveworks/mesh) [![Circle CI](https://circleci.com/gh/weaveworks/mesh.svg?style=svg)](https://circleci.com/gh/weaveworks/mesh) 2 | 3 | Mesh is a tool for building distributed applications. 4 | 5 | Mesh implements a [gossip protocol](https://en.wikipedia.org/wiki/Gossip_protocol) 6 | that provide membership, unicast, and broadcast functionality 7 | with [eventually-consistent semantics](https://en.wikipedia.org/wiki/Eventual_consistency). 8 | In CAP terms, it is AP: highly-available and partition-tolerant. 9 | 10 | Mesh works in a wide variety of network setups, including thru NAT and firewalls, and across clouds and datacenters. 11 | It works in situations where there is only partial connectivity, 12 | i.e. data is transparently routed across multiple hops when there is no direct connection between peers. 13 | It copes with partitions and partial network failure. 14 | It can be easily bootstrapped, typically only requiring knowledge of a single existing peer in the mesh to join. 15 | It has built-in shared-secret authentication and encryption. 16 | It scales to on the order of 100 peers, and has no dependencies. 17 | 18 | ## Using 19 | 20 | Mesh is currently distributed as a Go package. 21 | See [the API documentation](https://godoc.org/github.com/weaveworks/mesh). 22 | 23 | We plan to offer Mesh as a standalone service + an easy-to-use API. 24 | We will support multiple deployment scenarios, including 25 | as a standalone binary, 26 | as a container, 27 | as an ambassador or [sidecar](http://blog.kubernetes.io/2015/06/the-distributed-system-toolkit-patterns.html) component to an existing container, 28 | and as an infrastructure service in popular platforms. 29 | 30 | ## Developing 31 | 32 | Mesh builds with the standard Go tooling. You will need to put the 33 | repository in Go's expected directory structure; i.e., 34 | `$GOPATH/src/github.com/weaveworks/mesh`. 35 | 36 | ### Building 37 | 38 | If necessary, you may fetch the latest version of all of the dependencies into your GOPATH via 39 | 40 | `go get -d -u -t ./...` 41 | 42 | Build the code with the usual 43 | 44 | `go install ./...` 45 | 46 | ### Testing 47 | 48 | Assuming you've fetched dependencies as above, 49 | 50 | `go test ./...` 51 | 52 | ### Dependencies 53 | 54 | Mesh is a library, designed to be imported into a binary package. 55 | Vendoring is currently the best way for binary package authors to ensure reliable, reproducible builds. 56 | Therefore, we strongly recommend our users use vendoring for all of their dependencies, including Mesh. 57 | To avoid compatibility and availability issues, Mesh doesn't vendor its own dependencies, and doesn't recommend use of third-party import proxies. 58 | 59 | There are several tools to make vendoring easier, including 60 | [gb](https://getgb.io), 61 | [gvt](https://github.com/filosottile/gvt), 62 | [glide](https://github.com/Masterminds/glide), and 63 | [govendor](https://github.com/kardianos/govendor). 64 | 65 | ### Workflow 66 | 67 | Mesh follows a typical PR workflow. 68 | All contributions should be made as pull requests that satisfy the guidelines, below. 69 | 70 | ### Guidelines 71 | 72 | - All code must abide [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments) 73 | - Names should abide [What's in a name](https://talks.golang.org/2014/names.slide#1) 74 | - Code must build on both Linux and Darwin, via plain `go build` 75 | - Code should have appropriate test coverage, invoked via plain `go test` 76 | 77 | In addition, several mechanical checks are enforced. 78 | See [the lint script](/lint) for details. 79 | 80 | ## Getting Help 81 | 82 | If you have any questions about, feedback for or problems with `mesh`: 83 | 84 | - Invite yourself to the Weave Users Slack. 85 | - Ask a question on the [#general](https://weave-community.slack.com/messages/general/) slack channel. 86 | - [File an issue](https://github.com/weaveworks/mesh/issues/new). 87 | 88 | Your feedback is always welcome! 89 | -------------------------------------------------------------------------------- /_metcd/README.md: -------------------------------------------------------------------------------- 1 | # metcd 2 | 3 | metcd implements the [etcd](https://github.com/coreos/etcd) 4 | [V3 API](https://github.com/coreos/etcd/blob/master/Documentation/rfc/v3api.md) 5 | on top of Weave Mesh. 6 | 7 | **Note** that this package no longer compiles due to changes in etcd upstream. 8 | The code remains for historical purposes. 9 | 10 | # Caveats 11 | 12 | - We only partially implement the etcd V3 API. See [etcd_store.go](https://github.com/weaveworks/mesh/blob/master/metcd/etcd_store.go) for details. 13 | - Snapshotting and compaction are not yet implemented. 14 | 15 | ## Usage 16 | 17 | ```go 18 | ln, err := net.Listen("tcp", ":8080") 19 | if err != nil { 20 | panic(err) 21 | } 22 | 23 | minPeerCount := 3 24 | logger := log.New(os.Stderr, "", log.Lstdflags) 25 | server := metcd.NewDefaultServer(minPeerCount, logger) 26 | 27 | server.Serve(ln) 28 | ``` 29 | 30 | To have finer-grained control over the mesh, use [metcd.NewServer](http://godoc.org/github.com/weaveworks/mesh/metcd#NewServer). 31 | See [metcdsrv](https://github.com/weaveworks/mesh/tree/master/metcd/metcdsrv/main.go) for a complete example. 32 | -------------------------------------------------------------------------------- /_metcd/ctrl.go: -------------------------------------------------------------------------------- 1 | package metcd 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | "net" 9 | "time" 10 | 11 | "github.com/coreos/etcd/raft" 12 | "github.com/coreos/etcd/raft/raftpb" 13 | "golang.org/x/net/context" 14 | 15 | "github.com/weaveworks/mesh" 16 | "github.com/weaveworks/mesh/meshconn" 17 | ) 18 | 19 | // +-------------+ +-----------------+ +-------------------------+ +-------+ 20 | // | mesh.Router | | packetTransport | | ctrl | | state | 21 | // | | | | | +-------------------+ | | | 22 | // | | | +----------+ | | | raft.Node | | | | 23 | // | | | | meshconn | | | | | | | | 24 | // | |======| ReadFrom|-----incomingc------->|Step Propose|<-----| API|<--- 25 | // | | | | WriteTo|<--------outgoingc----| | | | | 26 | // | | | +----------+ | | | | | | | 27 | // | | +-----------------+ | | | | | | 28 | // | | | | | | +-------+ 29 | // | | +------------+ +--------------+ | | | | ^ ^ 30 | // | |===| membership |->| configurator |---->|ProposeConfChange | | | | 31 | // +-------------+ +------------+ +--------------+ | | | | | | 32 | // ^ | +-------------------+ | | | 33 | // | | | | | | | 34 | // | +-------|---------|-------+ | | 35 | // H E R E | entryc snapshotc | | 36 | // B E | | | | | 37 | // D R A G O N S | | '-------------' | 38 | // | v | 39 | // | ConfChange +---------+ Normal | 40 | // '-------------| demuxer |----------------------' 41 | // +---------+ 42 | 43 | type ctrl struct { 44 | self raft.Peer 45 | minPeerCount int 46 | incomingc <-chan raftpb.Message // from the transport 47 | outgoingc chan<- raftpb.Message // to the transport 48 | unreachablec <-chan uint64 // from the transport 49 | confchangec <-chan raftpb.ConfChange // from the mesh 50 | snapshotc chan<- raftpb.Snapshot // to the state machine 51 | entryc chan<- raftpb.Entry // to the demuxer 52 | proposalc <-chan []byte // from the state machine 53 | stopc chan struct{} // from stop() 54 | removedc chan<- struct{} // to calling context 55 | terminatedc chan struct{} 56 | storage *raft.MemoryStorage 57 | node raft.Node 58 | logger mesh.Logger 59 | } 60 | 61 | func newCtrl( 62 | self net.Addr, 63 | others []net.Addr, // to join existing cluster, pass nil or empty others 64 | minPeerCount int, 65 | incomingc <-chan raftpb.Message, 66 | outgoingc chan<- raftpb.Message, 67 | unreachablec <-chan uint64, 68 | confchangec <-chan raftpb.ConfChange, 69 | snapshotc chan<- raftpb.Snapshot, 70 | entryc chan<- raftpb.Entry, 71 | proposalc <-chan []byte, 72 | removedc chan<- struct{}, 73 | logger mesh.Logger, 74 | ) *ctrl { 75 | storage := raft.NewMemoryStorage() 76 | raftLogger := &raft.DefaultLogger{Logger: log.New(ioutil.Discard, "", 0)} 77 | raftLogger.EnableDebug() 78 | nodeConfig := &raft.Config{ 79 | ID: makeRaftPeer(self).ID, 80 | ElectionTick: 10, 81 | HeartbeatTick: 1, 82 | Storage: storage, 83 | Applied: 0, // starting fresh 84 | MaxSizePerMsg: 4096, // TODO(pb): looks like bytes; confirm that 85 | MaxInflightMsgs: 256, // TODO(pb): copied from docs; confirm that 86 | CheckQuorum: true, // leader steps down if quorum is not active for an electionTimeout 87 | Logger: raftLogger, 88 | } 89 | 90 | startPeers := makeRaftPeers(others) 91 | if len(startPeers) == 0 { 92 | startPeers = nil // special case: join existing 93 | } 94 | node := raft.StartNode(nodeConfig, startPeers) 95 | 96 | c := &ctrl{ 97 | self: makeRaftPeer(self), 98 | minPeerCount: minPeerCount, 99 | incomingc: incomingc, 100 | outgoingc: outgoingc, 101 | unreachablec: unreachablec, 102 | confchangec: confchangec, 103 | snapshotc: snapshotc, 104 | entryc: entryc, 105 | proposalc: proposalc, 106 | stopc: make(chan struct{}), 107 | removedc: removedc, 108 | terminatedc: make(chan struct{}), 109 | storage: storage, 110 | node: node, 111 | logger: logger, 112 | } 113 | go c.driveRaft() // analagous to raftexample serveChannels 114 | return c 115 | } 116 | 117 | // It is a programmer error to call stop more than once. 118 | func (c *ctrl) stop() { 119 | close(c.stopc) 120 | <-c.terminatedc 121 | } 122 | 123 | func (c *ctrl) driveRaft() { 124 | defer c.logger.Printf("ctrl: driveRaft loop exit") 125 | defer close(c.terminatedc) 126 | defer c.node.Stop() 127 | 128 | // We own driveProposals. We may terminate when the user invokes stop, or when 129 | // the Raft Node shuts down, which is generally when it receives a ConfChange 130 | // that removes it from the cluster. In either case, we kill driveProposals, 131 | // and wait for it to exit before returning. 132 | cancel := make(chan struct{}) 133 | done := make(chan struct{}) 134 | go func() { 135 | c.driveProposals(cancel) 136 | close(done) 137 | }() 138 | defer func() { <-done }() // order is important here 139 | defer close(cancel) // 140 | 141 | // Now that we are holding a raft.Node we have a few responsibilities. 142 | // https://godoc.org/github.com/coreos/etcd/raft 143 | 144 | ticker := time.NewTicker(100 * time.Millisecond) // TODO(pb): taken from raftexample; need to validate 145 | defer ticker.Stop() 146 | 147 | for { 148 | select { 149 | case <-ticker.C: 150 | c.node.Tick() 151 | 152 | case r := <-c.node.Ready(): 153 | if err := c.handleReady(r); err != nil { 154 | c.logger.Printf("ctrl: handle ready: %v (aborting)", err) 155 | close(c.removedc) 156 | return 157 | } 158 | 159 | case msg := <-c.incomingc: 160 | c.node.Step(context.TODO(), msg) 161 | 162 | case id := <-c.unreachablec: 163 | c.node.ReportUnreachable(id) 164 | 165 | case <-c.stopc: 166 | c.logger.Printf("ctrl: got stop signal") 167 | return 168 | } 169 | } 170 | } 171 | 172 | func (c *ctrl) driveProposals(cancel <-chan struct{}) { 173 | defer c.logger.Printf("ctrl: driveProposals loop exit") 174 | 175 | // driveProposals is a separate goroutine from driveRaft, to mirror 176 | // contrib/raftexample. To be honest, it's not clear to me why that should be 177 | // required; it seems like we should be able to drive these channels in the 178 | // same for/select loop as the others. But we have strange errors (likely 179 | // deadlocks) if we structure it that way. 180 | 181 | for c.proposalc != nil && c.confchangec != nil { 182 | select { 183 | case data, ok := <-c.proposalc: 184 | if !ok { 185 | c.logger.Printf("ctrl: got nil proposal; shutting down proposals") 186 | c.proposalc = nil 187 | continue 188 | } 189 | c.node.Propose(context.TODO(), data) 190 | 191 | case cc, ok := <-c.confchangec: 192 | if !ok { 193 | c.logger.Printf("ctrl: got nil conf change; shutting down conf changes") 194 | c.confchangec = nil 195 | continue 196 | } 197 | c.logger.Printf("ctrl: ProposeConfChange %s %x", cc.Type, cc.NodeID) 198 | c.node.ProposeConfChange(context.TODO(), cc) 199 | 200 | case <-cancel: 201 | return 202 | } 203 | } 204 | } 205 | 206 | func (c *ctrl) handleReady(r raft.Ready) error { 207 | // These steps may be performed in parallel, except as noted in step 2. 208 | // 209 | // 1. Write HardState, Entries, and Snapshot to persistent storage if they are 210 | // not empty. Note that when writing an Entry with Index i, any 211 | // previously-persisted entries with Index >= i must be discarded. 212 | if err := c.readySave(r.Snapshot, r.HardState, r.Entries); err != nil { 213 | return fmt.Errorf("save: %v", err) 214 | } 215 | 216 | // 2. Send all Messages to the nodes named in the To field. It is important 217 | // that no messages be sent until after the latest HardState has been persisted 218 | // to disk, and all Entries written by any previous Ready batch (Messages may 219 | // be sent while entries from the same batch are being persisted). If any 220 | // Message has type MsgSnap, call Node.ReportSnapshot() after it has been sent 221 | // (these messages may be large). Note: Marshalling messages is not 222 | // thread-safe; it is important that you make sure that no new entries are 223 | // persisted while marshalling. The easiest way to achieve this is to serialise 224 | // the messages directly inside your main raft loop. 225 | c.readySend(r.Messages) 226 | 227 | // 3. Apply Snapshot (if any) and CommittedEntries to the state machine. If any 228 | // committed Entry has Type EntryConfChange, call Node.ApplyConfChange() to 229 | // apply it to the node. The configuration change may be cancelled at this 230 | // point by setting the NodeID field to zero before calling ApplyConfChange 231 | // (but ApplyConfChange must be called one way or the other, and the decision 232 | // to cancel must be based solely on the state machine and not external 233 | // information such as the observed health of the node). 234 | if err := c.readyApply(r.Snapshot, r.CommittedEntries); err != nil { 235 | return fmt.Errorf("apply: %v", err) 236 | } 237 | 238 | // 4. Call Node.Advance() to signal readiness for the next batch of updates. 239 | // This may be done at any time after step 1, although all updates must be 240 | // processed in the order they were returned by Ready. 241 | c.readyAdvance() 242 | 243 | return nil 244 | } 245 | 246 | func (c *ctrl) readySave(snapshot raftpb.Snapshot, hardState raftpb.HardState, entries []raftpb.Entry) error { 247 | // For the moment, none of these steps persist to disk. That violates some Raft 248 | // invariants. But we are ephemeral, and will always boot empty, willingly 249 | // paying the snapshot cost. I trust that that the etcd Raft implementation 250 | // permits this. 251 | if !raft.IsEmptySnap(snapshot) { 252 | if err := c.storage.ApplySnapshot(snapshot); err != nil { 253 | return fmt.Errorf("apply snapshot: %v", err) 254 | } 255 | } 256 | if !raft.IsEmptyHardState(hardState) { 257 | if err := c.storage.SetHardState(hardState); err != nil { 258 | return fmt.Errorf("set hard state: %v", err) 259 | } 260 | } 261 | if err := c.storage.Append(entries); err != nil { 262 | return fmt.Errorf("append: %v", err) 263 | } 264 | return nil 265 | } 266 | 267 | func (c *ctrl) readySend(msgs []raftpb.Message) { 268 | for _, msg := range msgs { 269 | // If this fails, the transport will tell us asynchronously via unreachablec. 270 | c.outgoingc <- msg 271 | 272 | if msg.Type == raftpb.MsgSnap { 273 | // Assume snapshot sends always succeed. 274 | // TODO(pb): do we need error reporting? 275 | c.node.ReportSnapshot(msg.To, raft.SnapshotFinish) 276 | } 277 | } 278 | } 279 | 280 | func (c *ctrl) readyApply(snapshot raftpb.Snapshot, committedEntries []raftpb.Entry) error { 281 | c.snapshotc <- snapshot 282 | 283 | for _, committedEntry := range committedEntries { 284 | c.entryc <- committedEntry 285 | 286 | if committedEntry.Type == raftpb.EntryConfChange { 287 | // See raftexample raftNode.publishEntries 288 | var cc raftpb.ConfChange 289 | if err := cc.Unmarshal(committedEntry.Data); err != nil { 290 | return fmt.Errorf("unmarshal ConfChange: %v", err) 291 | } 292 | c.node.ApplyConfChange(cc) 293 | if cc.Type == raftpb.ConfChangeRemoveNode && cc.NodeID == c.self.ID { 294 | return errors.New("got ConfChange that removed me from the cluster; terminating") 295 | } 296 | } 297 | } 298 | 299 | return nil 300 | } 301 | 302 | func (c *ctrl) readyAdvance() { 303 | c.node.Advance() 304 | } 305 | 306 | // makeRaftPeer converts a net.Addr into a raft.Peer. 307 | // All peers must perform the Addr-to-Peer mapping in the same way. 308 | // 309 | // The etcd Raft implementation tracks the committed entry for each node ID, 310 | // and panics if it discovers a node has lost previously committed entries. 311 | // In effect, it assumes commitment implies durability. But our storage is 312 | // explicitly non-durable. So, whenever a node restarts, we need to give it 313 | // a brand new ID. That is the peer UID. 314 | func makeRaftPeer(addr net.Addr) raft.Peer { 315 | return raft.Peer{ 316 | ID: uint64(addr.(meshconn.MeshAddr).PeerUID), 317 | Context: nil, // TODO(pb): ?? 318 | } 319 | } 320 | 321 | func makeRaftPeers(addrs []net.Addr) []raft.Peer { 322 | peers := make([]raft.Peer, len(addrs)) 323 | for i, addr := range addrs { 324 | peers[i] = makeRaftPeer(addr) 325 | } 326 | return peers 327 | } 328 | -------------------------------------------------------------------------------- /_metcd/ctrl_test.go: -------------------------------------------------------------------------------- 1 | package metcd 2 | 3 | import ( 4 | "log" 5 | "net" 6 | "os" 7 | "testing" 8 | "time" 9 | 10 | "github.com/coreos/etcd/raft/raftpb" 11 | 12 | "github.com/weaveworks/mesh" 13 | "github.com/weaveworks/mesh/meshconn" 14 | ) 15 | 16 | func TestCtrlTerminates(t *testing.T) { 17 | var ( 18 | peerName, _ = mesh.PeerNameFromString("01:23:45:67:89:01") 19 | self = meshconn.MeshAddr{PeerName: peerName, PeerUID: 123} 20 | others = []net.Addr{} 21 | minPeerCount = 5 22 | incomingc = make(chan raftpb.Message) 23 | outgoingc = make(chan raftpb.Message, 10000) 24 | unreachablec = make(chan uint64) 25 | confchangec = make(chan raftpb.ConfChange) 26 | snapshotc = make(chan raftpb.Snapshot, 10000) 27 | entryc = make(chan raftpb.Entry) 28 | proposalc = make(chan []byte) 29 | removedc = make(chan struct{}) 30 | logger = log.New(os.Stderr, "", log.LstdFlags) 31 | ) 32 | c := newCtrl( 33 | self, 34 | others, 35 | minPeerCount, 36 | incomingc, 37 | outgoingc, 38 | unreachablec, 39 | confchangec, 40 | snapshotc, 41 | entryc, 42 | proposalc, 43 | removedc, 44 | logger, 45 | ) 46 | stopped := make(chan struct{}) 47 | go func() { 48 | c.stop() 49 | close(stopped) 50 | }() 51 | select { 52 | case <-stopped: 53 | t.Log("ctrl terminated") 54 | case <-time.After(5 * time.Second): 55 | t.Fatal("ctrl didn't terminate") 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /_metcd/key_helpers.go: -------------------------------------------------------------------------------- 1 | package metcd 2 | 3 | // PrefixRangeEnd allows Get, Delete, and Watch requests to operate on all keys 4 | // with a matching prefix. Pass the prefix to this function, and use the result 5 | // as the RangeEnd value. 6 | func PrefixRangeEnd(prefix []byte) []byte { 7 | // https://github.com/coreos/etcd/blob/17e32b6/clientv3/op.go#L187 8 | end := make([]byte, len(prefix)) 9 | copy(end, prefix) 10 | for i := len(end) - 1; i >= 0; i-- { 11 | if end[i] < 0xff { 12 | end[i] = end[i] + 1 13 | end = end[:i+1] 14 | return end 15 | } 16 | } 17 | // next prefix does not exist (e.g., 0xffff); 18 | // default to WithFromKey policy 19 | return []byte{0} 20 | } 21 | -------------------------------------------------------------------------------- /_metcd/membership.go: -------------------------------------------------------------------------------- 1 | package metcd 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/coreos/etcd/raft/raftpb" 7 | 8 | "github.com/weaveworks/mesh" 9 | ) 10 | 11 | // membership regularly polls the mesh.Router for peers in the mesh. 12 | // New peer UIDs are sent on addc. Removed peer UIDs are sent on remc. 13 | // If the membership set gets smaller than minCount, membership will 14 | // close shrunkc and stop, and the caller should terminate. 15 | type membership struct { 16 | router *mesh.Router 17 | minCount int 18 | addc chan<- uint64 // to configurator 19 | remc chan<- uint64 // to configurator 20 | shrunkc chan<- struct{} // to calling context 21 | quitc chan struct{} 22 | logger mesh.Logger 23 | } 24 | 25 | func newMembership(router *mesh.Router, initial uint64set, minCount int, addc, remc chan<- uint64, shrunkc chan<- struct{}, logger mesh.Logger) *membership { 26 | m := &membership{ 27 | router: router, 28 | minCount: minCount, 29 | addc: addc, 30 | remc: remc, 31 | shrunkc: shrunkc, 32 | quitc: make(chan struct{}), 33 | logger: logger, 34 | } 35 | go m.loop(initial) 36 | return m 37 | } 38 | 39 | func (m *membership) stop() { 40 | close(m.quitc) 41 | } 42 | 43 | func (m *membership) loop(members uint64set) { 44 | defer m.logger.Printf("membership: loop exit") 45 | 46 | ticker := time.NewTicker(time.Second) 47 | defer ticker.Stop() 48 | 49 | var add, rem uint64set 50 | 51 | for { 52 | select { 53 | case <-ticker.C: 54 | add, rem, members = diff(members, membershipSet(m.router)) 55 | if len(members) < m.minCount { 56 | m.logger.Printf("membership: member count (%d) shrunk beneath minimum (%d)", len(members), m.minCount) 57 | close(m.shrunkc) 58 | return 59 | } 60 | for id := range add { 61 | m.addc <- id 62 | } 63 | for id := range rem { 64 | m.remc <- id 65 | } 66 | case <-m.quitc: 67 | return 68 | } 69 | } 70 | } 71 | 72 | func membershipSet(router *mesh.Router) uint64set { 73 | descriptions := router.Peers.Descriptions() 74 | members := make(uint64set, len(descriptions)) 75 | for _, description := range descriptions { 76 | members.add(uint64(description.UID)) 77 | } 78 | return members 79 | } 80 | 81 | func diff(prev, curr uint64set) (add, rem, next uint64set) { 82 | add, rem, next = uint64set{}, uint64set{}, uint64set{} 83 | for i := range prev { 84 | prev.del(i) 85 | if curr.has(i) { // was in previous, still in current 86 | curr.del(i) // prevent it from being interpreted as new 87 | next.add(i) // promoted to next 88 | } else { // was in previous, no longer in current 89 | rem.add(i) // marked as removed 90 | } 91 | } 92 | for i := range curr { 93 | curr.del(i) 94 | add.add(i) 95 | next.add(i) 96 | } 97 | return add, rem, next 98 | } 99 | 100 | // configurator sits between the mesh membership subsystem and the raft.Node. 101 | // When the mesh tells us that a peer is removed, the configurator adds that 102 | // peer ID to a pending-remove set. Every tick, the configurator sends a 103 | // ConfChange Remove proposal to the raft.Node for each peer in the 104 | // pending-remove set. And when the configurator receives a committed ConfChange 105 | // Remove entry for the peer, it removes the peer from the pending-remove set. 106 | // 107 | // We do the same thing for the add flow, for symmetry. 108 | // 109 | // Why is this necessary? Well, due to what looks like a bug in the raft.Node, 110 | // ConfChange Remove proposals can get lost when the target node disappears. It 111 | // is especially acute when the killed node is the leader. The current (or new) 112 | // leader ends up spamming Heartbeats to the terminated node forever. So, 113 | // lacking any obvious way to track the state of individual proposals, I've 114 | // elected to continuously re-propose ConfChanges until they are confirmed i.e. 115 | // committed. 116 | type configurator struct { 117 | addc <-chan uint64 // from membership 118 | remc <-chan uint64 // from membership 119 | confchangec chan<- raftpb.ConfChange // to raft.Node 120 | entryc <-chan raftpb.Entry // from raft.Node 121 | quitc chan struct{} 122 | logger mesh.Logger 123 | } 124 | 125 | func newConfigurator(addc, remc <-chan uint64, confchangec chan<- raftpb.ConfChange, entryc <-chan raftpb.Entry, logger mesh.Logger) *configurator { 126 | c := &configurator{ 127 | addc: addc, 128 | remc: remc, 129 | confchangec: confchangec, 130 | entryc: entryc, 131 | quitc: make(chan struct{}), 132 | logger: logger, 133 | } 134 | go c.loop() 135 | return c 136 | } 137 | 138 | func (c *configurator) stop() { 139 | close(c.quitc) 140 | } 141 | 142 | func (c *configurator) loop() { 143 | defer c.logger.Printf("configurator: loop exit") 144 | 145 | ticker := time.NewTicker(time.Second) 146 | defer ticker.Stop() 147 | 148 | var ( 149 | pendingAdd = uint64set{} 150 | pendingRem = uint64set{} 151 | ) 152 | 153 | for { 154 | select { 155 | case id := <-c.addc: 156 | if pendingAdd.has(id) { 157 | c.logger.Printf("configurator: recv add %x, was pending add already", id) 158 | } else { 159 | c.logger.Printf("configurator: recv add %x, now pending add", id) 160 | pendingAdd.add(id) 161 | // We *must* wait before emitting a ConfChange. 162 | // https://github.com/coreos/etcd/issues/4759 163 | } 164 | 165 | case id := <-c.remc: 166 | if pendingRem.has(id) { 167 | c.logger.Printf("configurator: recv rem %x, was pending rem already", id) 168 | } else { 169 | c.logger.Printf("configurator: recv rem %x, now pending rem", id) 170 | pendingRem.add(id) 171 | // We *must* wait before emitting a ConfChange. 172 | // https://github.com/coreos/etcd/issues/4759 173 | } 174 | 175 | case <-ticker.C: 176 | for id := range pendingAdd { 177 | c.logger.Printf("configurator: send ConfChangeAddNode %x", id) 178 | c.confchangec <- raftpb.ConfChange{ 179 | Type: raftpb.ConfChangeAddNode, 180 | NodeID: id, 181 | } 182 | } 183 | for id := range pendingRem { 184 | c.logger.Printf("configurator: send ConfChangeRemoveNode %x", id) 185 | c.confchangec <- raftpb.ConfChange{ 186 | Type: raftpb.ConfChangeRemoveNode, 187 | NodeID: id, 188 | } 189 | } 190 | 191 | case entry := <-c.entryc: 192 | if entry.Type != raftpb.EntryConfChange { 193 | c.logger.Printf("configurator: ignoring %s", entry.Type) 194 | continue 195 | } 196 | var cc raftpb.ConfChange 197 | if err := cc.Unmarshal(entry.Data); err != nil { 198 | c.logger.Printf("configurator: got invalid ConfChange (%v); ignoring", err) 199 | continue 200 | } 201 | switch cc.Type { 202 | case raftpb.ConfChangeAddNode: 203 | if _, ok := pendingAdd[cc.NodeID]; ok { 204 | c.logger.Printf("configurator: recv %s %x: was pending add, deleting", cc.Type, cc.NodeID) 205 | delete(pendingAdd, cc.NodeID) 206 | } else { 207 | c.logger.Printf("configurator: recv %s %x: not pending add, ignoring", cc.Type, cc.NodeID) 208 | } 209 | case raftpb.ConfChangeRemoveNode: 210 | if _, ok := pendingRem[cc.NodeID]; ok { 211 | c.logger.Printf("configurator: recv %s %x: was pending rem, deleting", cc.Type, cc.NodeID) 212 | delete(pendingRem, cc.NodeID) 213 | } else { 214 | c.logger.Printf("configurator: recv %s %x: not pending rem, ignoring", cc.Type, cc.NodeID) 215 | } 216 | } 217 | 218 | case <-c.quitc: 219 | return 220 | } 221 | } 222 | } 223 | 224 | type uint64set map[uint64]struct{} 225 | 226 | func (s uint64set) add(i uint64) { s[i] = struct{}{} } 227 | func (s uint64set) has(i uint64) bool { _, ok := s[i]; return ok } 228 | func (s uint64set) del(i uint64) { delete(s, i) } 229 | -------------------------------------------------------------------------------- /_metcd/metcdsrv/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | "net" 9 | "os" 10 | "os/signal" 11 | "sort" 12 | "strconv" 13 | "strings" 14 | "syscall" 15 | "time" 16 | 17 | "github.com/weaveworks/mesh" 18 | "github.com/weaveworks/mesh/meshconn" 19 | "github.com/weaveworks/mesh/metcd" 20 | ) 21 | 22 | func main() { 23 | peers := &stringset{} 24 | var ( 25 | apiListen = flag.String("api", ":8080", "API listen address") 26 | meshListen = flag.String("mesh", net.JoinHostPort("0.0.0.0", strconv.Itoa(mesh.Port)), "mesh listen address") 27 | hwaddr = flag.String("hwaddr", mustHardwareAddr(), "MAC address, i.e. mesh peer name") 28 | nickname = flag.String("nickname", mustHostname(), "peer nickname") 29 | password = flag.String("password", "", "password (optional)") 30 | channel = flag.String("channel", "default", "gossip channel name") 31 | quicktest = flag.Int("quicktest", 0, "set to integer 1-9 to enable quick test setup of node") 32 | n = flag.Int("n", 3, "number of peers expected (lower bound)") 33 | ) 34 | flag.Var(peers, "peer", "initial peer (may be repeated)") 35 | flag.Parse() 36 | 37 | if *quicktest >= 1 && *quicktest <= 9 { 38 | *hwaddr = fmt.Sprintf("00:00:00:00:00:0%d", *quicktest) 39 | *meshListen = fmt.Sprintf("0.0.0.0:600%d", *quicktest) 40 | *apiListen = fmt.Sprintf("0.0.0.0:800%d", *quicktest) 41 | *nickname = fmt.Sprintf("%d", *quicktest) 42 | for i := 1; i <= 9; i++ { 43 | peers.Set(fmt.Sprintf("127.0.0.1:600%d", i)) 44 | } 45 | } 46 | 47 | logger := log.New(os.Stderr, *nickname+"> ", log.LstdFlags) 48 | 49 | host, portStr, err := net.SplitHostPort(*meshListen) 50 | if err != nil { 51 | logger.Fatalf("mesh address: %s: %v", *meshListen, err) 52 | } 53 | port, err := strconv.Atoi(portStr) 54 | if err != nil { 55 | logger.Fatalf("mesh address: %s: %v", *meshListen, err) 56 | } 57 | 58 | name, err := mesh.PeerNameFromString(*hwaddr) 59 | if err != nil { 60 | logger.Fatalf("%s: %v", *hwaddr, err) 61 | } 62 | 63 | ln, err := net.Listen("tcp", *apiListen) 64 | if err != nil { 65 | logger.Fatal(err) 66 | } 67 | 68 | logger.Printf("hello!") 69 | defer logger.Printf("goodbye!") 70 | 71 | // Create, but do not start, a router. 72 | meshLogger := log.New(ioutil.Discard, "", 0) // no log from mesh please 73 | router := mesh.NewRouter(mesh.Config{ 74 | Host: host, 75 | Port: port, 76 | ProtocolMinVersion: mesh.ProtocolMinVersion, 77 | Password: []byte(*password), 78 | ConnLimit: 64, 79 | PeerDiscovery: true, 80 | TrustedSubnets: []*net.IPNet{}, 81 | }, name, *nickname, mesh.NullOverlay{}, meshLogger) 82 | 83 | // Create a meshconn.Peer. 84 | peer := meshconn.NewPeer(name, router.Ourself.UID, logger) 85 | gossip := router.NewGossip(*channel, peer) 86 | peer.Register(gossip) 87 | 88 | // Start the router and join the mesh. 89 | func() { 90 | logger.Printf("mesh router starting (%s)", *meshListen) 91 | router.Start() 92 | }() 93 | defer func() { 94 | logger.Printf("mesh router stopping") 95 | router.Stop() 96 | }() 97 | 98 | router.ConnectionMaker.InitiateConnections(peers.slice(), true) 99 | 100 | terminatec := make(chan struct{}) 101 | terminatedc := make(chan error) 102 | go func() { 103 | c := make(chan os.Signal) 104 | signal.Notify(c, syscall.SIGINT, syscall.SIGTERM) 105 | sig := <-c // receive interrupt 106 | close(terminatec) // terminate metcd.Server 107 | <-terminatedc // wait for shutdown 108 | terminatedc <- fmt.Errorf("%s", sig) // forward signal 109 | }() 110 | go func() { 111 | metcdServer := metcd.NewServer(router, peer, *n, terminatec, terminatedc, logger) 112 | grpcServer := metcd.GRPCServer(metcdServer) 113 | defer grpcServer.Stop() 114 | logger.Printf("gRPC listening at %s", *apiListen) 115 | terminatedc <- grpcServer.Serve(ln) 116 | }() 117 | logger.Print(<-terminatedc) 118 | time.Sleep(time.Second) // TODO(pb): there must be a better way 119 | } 120 | 121 | type stringset map[string]struct{} 122 | 123 | func (ss stringset) Set(value string) error { 124 | ss[value] = struct{}{} 125 | return nil 126 | } 127 | 128 | func (ss stringset) String() string { 129 | return strings.Join(ss.slice(), ",") 130 | } 131 | 132 | func (ss stringset) slice() []string { 133 | slice := make([]string, 0, len(ss)) 134 | for k := range ss { 135 | slice = append(slice, k) 136 | } 137 | sort.Strings(slice) 138 | return slice 139 | } 140 | 141 | func mustHardwareAddr() string { 142 | ifaces, err := net.Interfaces() 143 | if err != nil { 144 | panic(err) 145 | } 146 | for _, iface := range ifaces { 147 | if s := iface.HardwareAddr.String(); s != "" { 148 | return s 149 | } 150 | } 151 | panic("no valid network interfaces") 152 | } 153 | 154 | func mustHostname() string { 155 | hostname, err := os.Hostname() 156 | if err != nil { 157 | panic(err) 158 | } 159 | return hostname 160 | } 161 | -------------------------------------------------------------------------------- /_metcd/metcdsrv/quickcluster.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | # Kill child processes at exit 8 | trap "pkill -P $$" SIGINT SIGTERM EXIT 9 | 10 | go install github.com/weaveworks/mesh/metcd/metcdsrv 11 | 12 | metcdsrv -quicktest=1 & 13 | metcdsrv -quicktest=2 & 14 | metcdsrv -quicktest=3 & 15 | 16 | read x 17 | 18 | -------------------------------------------------------------------------------- /_metcd/metcdsrv/sanitycheck.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This is just a sanity check for metcdsrv. 4 | 5 | set -o errexit 6 | set -o nounset 7 | set -o pipefail 8 | 9 | # Kill child processes at exit 10 | trap "pkill -P $$" SIGINT SIGTERM EXIT 11 | 12 | echo Installing metcdsrv 13 | go install github.com/weaveworks/mesh/metcd/metcdsrv 14 | 15 | echo Booting cluster 16 | # Remove output redirection to debug 17 | metcdsrv -quicktest=1 >/dev/null 2>&1 & 18 | metcdsrv -quicktest=2 >/dev/null 2>&1 & 19 | metcdsrv -quicktest=3 >/dev/null 2>&1 & 20 | 21 | echo Waiting for cluster to settle 22 | # Wait for the cluster to settle 23 | sleep 5 24 | 25 | echo Installing etcdctl 26 | go install github.com/coreos/etcd/cmd/etcdctl 27 | function etcdctl { env ETCDCTL_API=3 etcdctl --endpoints=127.0.0.1:8001,127.0.0.1:8002,127.0.0.1:8003 $*; } 28 | 29 | echo Testing first put 30 | etcdctl put foo bar 31 | have=$(etcdctl get foo | tail -n1) 32 | want="bar" 33 | if [[ $want != $have ]] 34 | then 35 | echo foo: want $want, have $have 36 | exit 1 37 | fi 38 | 39 | echo Testing second put 40 | etcdctl put foo baz 41 | have=$(etcdctl get foo | tail -n1) 42 | want="baz" 43 | if [[ $want != $have ]] 44 | then 45 | echo foo: want $want, have $have 46 | exit 1 47 | fi 48 | -------------------------------------------------------------------------------- /_metcd/packet_transport.go: -------------------------------------------------------------------------------- 1 | package metcd 2 | 3 | import ( 4 | "net" 5 | 6 | "github.com/coreos/etcd/raft/raftpb" 7 | 8 | "github.com/weaveworks/mesh" 9 | "github.com/weaveworks/mesh/meshconn" 10 | ) 11 | 12 | // packetTransport takes ownership of the net.PacketConn. 13 | // Incoming messages are unmarshaled from the conn and send to incomingc. 14 | // Outgoing messages are received from outgoingc and marshaled to the conn. 15 | type packetTransport struct { 16 | conn net.PacketConn 17 | translate peerTranslator 18 | incomingc chan<- raftpb.Message // to controller 19 | outgoingc <-chan raftpb.Message // from controller 20 | unreachablec chan<- uint64 // to controller 21 | logger mesh.Logger 22 | } 23 | 24 | func newPacketTransport( 25 | conn net.PacketConn, 26 | translate peerTranslator, 27 | incomingc chan<- raftpb.Message, 28 | outgoingc <-chan raftpb.Message, 29 | unreachablec chan<- uint64, 30 | logger mesh.Logger, 31 | ) *packetTransport { 32 | t := &packetTransport{ 33 | conn: conn, 34 | translate: translate, 35 | incomingc: incomingc, 36 | outgoingc: outgoingc, 37 | unreachablec: unreachablec, 38 | logger: logger, 39 | } 40 | go t.recvLoop() 41 | go t.sendLoop() 42 | return t 43 | } 44 | 45 | type peerTranslator func(uid mesh.PeerUID) (mesh.PeerName, error) 46 | 47 | func (t *packetTransport) stop() { 48 | t.conn.Close() 49 | } 50 | 51 | func (t *packetTransport) recvLoop() { 52 | defer t.logger.Printf("packet transport: recv loop exit") 53 | const maxRecvLen = 8192 54 | b := make([]byte, maxRecvLen) 55 | for { 56 | n, remote, err := t.conn.ReadFrom(b) 57 | if err != nil { 58 | t.logger.Printf("packet transport: recv: %v (aborting)", err) 59 | return 60 | } else if n >= cap(b) { 61 | t.logger.Printf("packet transport: recv from %s: short read, %d >= %d (continuing)", remote, n, cap(b)) 62 | continue 63 | } 64 | var msg raftpb.Message 65 | if err := msg.Unmarshal(b[:n]); err != nil { 66 | t.logger.Printf("packet transport: recv from %s (sz %d): %v (%s) (continuing)", remote, n, err, b[:n]) 67 | continue 68 | } 69 | //t.logger.Printf("packet transport: recv from %s (sz %d/%d) OK", remote, n, msg.Size()) 70 | t.incomingc <- msg 71 | } 72 | } 73 | 74 | func (t *packetTransport) sendLoop() { 75 | defer t.logger.Printf("packet transport: send loop exit") 76 | for msg := range t.outgoingc { 77 | b, err := msg.Marshal() 78 | if err != nil { 79 | t.logger.Printf("packet transport: send to Raft ID %x: %v (continuing)", msg.To, err) 80 | continue 81 | } 82 | peerName, err := t.translate(mesh.PeerUID(msg.To)) 83 | if err != nil { 84 | select { 85 | case t.unreachablec <- msg.To: 86 | t.logger.Printf("packet transport: send to Raft ID %x: %v (unreachable; continuing) (%s)", msg.To, err, msg.Type) 87 | default: 88 | t.logger.Printf("packet transport: send to Raft ID %x: %v (unreachable, report dropped; continuing) (%s)", msg.To, err, msg.Type) 89 | } 90 | continue 91 | } 92 | dst := meshconn.MeshAddr{PeerName: peerName} 93 | if n, err := t.conn.WriteTo(b, dst); err != nil { 94 | t.logger.Printf("packet transport: send to Mesh peer %s: %v (continuing)", dst, err) 95 | continue 96 | } else if n < len(b) { 97 | t.logger.Printf("packet transport: send to Mesh peer %s: short write, %d < %d (continuing)", dst, n, len(b)) 98 | continue 99 | } 100 | //t.logger.Printf("packet transport: send to %s (sz %d/%d) OK", dst, msg.Size(), len(b)) 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /_metcd/server.go: -------------------------------------------------------------------------------- 1 | package metcd 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "os" 7 | "time" 8 | 9 | "github.com/coreos/etcd/etcdserver/etcdserverpb" 10 | "github.com/coreos/etcd/raft/raftpb" 11 | "google.golang.org/grpc" 12 | 13 | "github.com/weaveworks/mesh" 14 | "github.com/weaveworks/mesh/meshconn" 15 | ) 16 | 17 | // Server collects the etcd V3 server interfaces that we implement. 18 | type Server interface { 19 | //etcdserverpb.AuthServer 20 | //etcdserverpb.ClusterServer 21 | etcdserverpb.KVServer 22 | //etcdserverpb.LeaseServer 23 | //etcdserverpb.MaintenanceServer 24 | //etcdserverpb.WatchServer 25 | } 26 | 27 | // GRPCServer converts a metcd.Server to a *grpc.Server. 28 | func GRPCServer(s Server, options ...grpc.ServerOption) *grpc.Server { 29 | srv := grpc.NewServer(options...) 30 | //etcdserverpb.RegisterAuthServer(srv, s) 31 | //etcdserverpb.RegisterClusterServer(srv, s) 32 | etcdserverpb.RegisterKVServer(srv, s) 33 | //etcdserverpb.RegisterLeaseServer(srv, s) 34 | //etcdserverpb.RegisterMaintenanceServer(srv, s) 35 | //etcdserverpb.RegisterWatchServer(srv, s) 36 | return srv 37 | } 38 | 39 | // NewServer returns a Server that (partially) implements the etcd V3 API. 40 | // It uses the passed mesh components to act as the Raft transport. 41 | // For the moment, it blocks until the mesh has minPeerCount peers. 42 | // (This responsibility should rather be given to the caller.) 43 | // The server can be terminated by certain conditions in the cluster. 44 | // If that happens, terminatedc signaled, and the server is invalid. 45 | func NewServer( 46 | router *mesh.Router, 47 | peer *meshconn.Peer, 48 | minPeerCount int, 49 | terminatec <-chan struct{}, 50 | terminatedc chan<- error, 51 | logger mesh.Logger, 52 | ) Server { 53 | c := make(chan Server) 54 | go serverManager(router, peer, minPeerCount, terminatec, terminatedc, logger, c) 55 | return <-c 56 | } 57 | 58 | // NewDefaultServer is like NewServer, but we take care of creating a 59 | // mesh.Router and meshconn.Peer for you, with sane defaults. If you need more 60 | // fine-grained control, create the components yourself and use NewServer. 61 | func NewDefaultServer( 62 | minPeerCount int, 63 | terminatec <-chan struct{}, 64 | terminatedc chan<- error, 65 | logger mesh.Logger, 66 | ) Server { 67 | var ( 68 | peerName = mustPeerName() 69 | nickName = mustHostname() 70 | host = "0.0.0.0" 71 | port = 6379 72 | password = "" 73 | channel = "metcd" 74 | ) 75 | router := mesh.NewRouter(mesh.Config{ 76 | Host: host, 77 | Port: port, 78 | ProtocolMinVersion: mesh.ProtocolMinVersion, 79 | Password: []byte(password), 80 | ConnLimit: 64, 81 | PeerDiscovery: true, 82 | TrustedSubnets: []*net.IPNet{}, 83 | }, peerName, nickName, mesh.NullOverlay{}, logger) 84 | 85 | // Create a meshconn.Peer and connect it to a channel. 86 | peer := meshconn.NewPeer(router.Ourself.Peer.Name, router.Ourself.UID, logger) 87 | gossip := router.NewGossip(channel, peer) 88 | peer.Register(gossip) 89 | 90 | // Start the router and join the mesh. 91 | // Note that we don't ever stop the router. 92 | // This may or may not be a problem. 93 | // TODO(pb): determine if this is a super huge problem 94 | router.Start() 95 | 96 | return NewServer(router, peer, minPeerCount, terminatec, terminatedc, logger) 97 | } 98 | 99 | func serverManager( 100 | router *mesh.Router, 101 | peer *meshconn.Peer, 102 | minPeerCount int, 103 | terminatec <-chan struct{}, 104 | terminatedc chan<- error, 105 | logger mesh.Logger, 106 | out chan<- Server, 107 | ) { 108 | // Identify mesh peers to either create or join a cluster. 109 | // This algorithm is presently completely insufficient. 110 | // It suffers from timing failures, and doesn't understand channels. 111 | // TODO(pb): use gossip to agree on better starting conditions 112 | var ( 113 | self = meshconn.MeshAddr{PeerName: router.Ourself.Peer.Name, PeerUID: router.Ourself.UID} 114 | others = []net.Addr{} 115 | ) 116 | for { 117 | others = others[:0] 118 | for _, desc := range router.Peers.Descriptions() { 119 | others = append(others, meshconn.MeshAddr{PeerName: desc.Name, PeerUID: desc.UID}) 120 | } 121 | if len(others) == minPeerCount { 122 | logger.Printf("detected %d peers; creating", len(others)) 123 | break 124 | } else if len(others) > minPeerCount { 125 | logger.Printf("detected %d peers; joining", len(others)) 126 | others = others[:0] // empty others slice means join 127 | break 128 | } 129 | logger.Printf("detected %d peers; waiting...", len(others)) 130 | time.Sleep(time.Second) 131 | } 132 | 133 | var ( 134 | incomingc = make(chan raftpb.Message) // from meshconn to ctrl 135 | outgoingc = make(chan raftpb.Message) // from ctrl to meshconn 136 | unreachablec = make(chan uint64, 10000) // from meshconn to ctrl 137 | confchangec = make(chan raftpb.ConfChange) // from meshconn to ctrl 138 | snapshotc = make(chan raftpb.Snapshot) // from ctrl to state machine 139 | entryc = make(chan raftpb.Entry) // from ctrl to state 140 | confentryc = make(chan raftpb.Entry) // from state to configurator 141 | proposalc = make(chan []byte) // from state machine to ctrl 142 | removedc = make(chan struct{}) // from ctrl to us 143 | shrunkc = make(chan struct{}) // from membership to us 144 | ) 145 | 146 | // Create the thing that watches the cluster membership via the router. It 147 | // signals conf changes, and closes shrunkc when the cluster is too small. 148 | var ( 149 | addc = make(chan uint64) 150 | remc = make(chan uint64) 151 | ) 152 | m := newMembership(router, membershipSet(router), minPeerCount, addc, remc, shrunkc, logger) 153 | defer m.stop() 154 | 155 | // Create the thing that converts mesh membership changes to Raft ConfChange 156 | // proposals. 157 | c := newConfigurator(addc, remc, confchangec, confentryc, logger) 158 | defer c.stop() 159 | 160 | // Create a packet transport, wrapping the meshconn.Peer. 161 | transport := newPacketTransport(peer, translateVia(router), incomingc, outgoingc, unreachablec, logger) 162 | defer transport.stop() 163 | 164 | // Create the API server. store.stop must go on the defer stack before 165 | // ctrl.stop so that the ctrl stops first. Otherwise, ctrl can deadlock 166 | // processing the last tick. 167 | store := newEtcdStore(proposalc, snapshotc, entryc, confentryc, logger) 168 | defer store.stop() 169 | 170 | // Create the controller, which drives the Raft node internally. 171 | ctrl := newCtrl(self, others, minPeerCount, incomingc, outgoingc, unreachablec, confchangec, snapshotc, entryc, proposalc, removedc, logger) 172 | defer ctrl.stop() 173 | 174 | // Return the store to the client. 175 | out <- store 176 | 177 | errc := make(chan error) 178 | go func() { 179 | <-terminatec 180 | errc <- fmt.Errorf("metcd server terminated by user request") 181 | }() 182 | go func() { 183 | <-removedc 184 | errc <- fmt.Errorf("the Raft peer was removed from the cluster") 185 | }() 186 | go func() { 187 | <-shrunkc 188 | errc <- fmt.Errorf("the Raft cluster got too small") 189 | }() 190 | terminatedc <- <-errc 191 | } 192 | 193 | func translateVia(router *mesh.Router) peerTranslator { 194 | return func(uid mesh.PeerUID) (mesh.PeerName, error) { 195 | for _, d := range router.Peers.Descriptions() { 196 | if d.UID == uid { 197 | return d.Name, nil 198 | } 199 | } 200 | return 0, fmt.Errorf("peer UID %x not known", uid) 201 | } 202 | } 203 | 204 | func mustPeerName() mesh.PeerName { 205 | peerName, err := mesh.PeerNameFromString(mustHardwareAddr()) 206 | if err != nil { 207 | panic(err) 208 | } 209 | return peerName 210 | } 211 | 212 | func mustHardwareAddr() string { 213 | ifaces, err := net.Interfaces() 214 | if err != nil { 215 | panic(err) 216 | } 217 | for _, iface := range ifaces { 218 | if s := iface.HardwareAddr.String(); s != "" { 219 | return s 220 | } 221 | } 222 | panic("no valid network interfaces") 223 | } 224 | 225 | func mustHostname() string { 226 | hostname, err := os.Hostname() 227 | if err != nil { 228 | panic(err) 229 | } 230 | return hostname 231 | } 232 | -------------------------------------------------------------------------------- /examples/increment-only-counter/README.md: -------------------------------------------------------------------------------- 1 | # Increment-only counter 2 | 3 | This example implements an in-memory incremental-only counter. 4 | This is a state-based CRDT, so the write operation is `incr()`. 5 | 6 | ## Demo 7 | 8 | Start several peers on the same host. 9 | Tell the second and subsequent peers to connect to the first one. 10 | 11 | ``` 12 | $ ./increment-only-counter -hwaddr 00:00:00:00:00:01 -nickname a -mesh :6001 -http :8001 & 13 | $ ./increment-only-counter -hwaddr 00:00:00:00:00:02 -nickname b -mesh :6002 -http :8002 -peer 127.0.0.1:6001 & 14 | $ ./increment-only-counter -hwaddr 00:00:00:00:00:03 -nickname c -mesh :6003 -http :8003 -peer 127.0.0.1:6001 & 15 | ``` 16 | 17 | Get current value using the HTTP API of any peer. 18 | 19 | ``` 20 | $ curl -Ss -XGET "http://localhost:8002/" 21 | get => 0 22 | ``` 23 | 24 | Increameant the value: 25 | ``` 26 | $ curl -Ss -XPOST "http://localhost:8003/" 27 | incr => 1 28 | ``` 29 | 30 | Get current value from another peer: 31 | ``` 32 | $ curl -Ss -XGET "http://localhost:8001/" 33 | get => 1 34 | ``` 35 | Incremeant again: 36 | ``` 37 | $ curl -Ss -XPOST "http://localhost:8002/" 38 | incr => 2 39 | ``` 40 | And get current value from a different peer: 41 | ``` 42 | > curl -Ss -XGET "http://localhost:8003/" 43 | get => 2 44 | ``` 45 | 46 | 47 | ## Implementation 48 | 49 | - [The state object](/examples/increment-only-counter/state.go) implements `GossipData`. 50 | - [The peer object](/examples/increment-only-counter/peer.go) implements `Gossiper`. 51 | - [The func main](/examples/increment-only-counter/main.go) wires the components together. 52 | -------------------------------------------------------------------------------- /examples/increment-only-counter/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | "net" 9 | "net/http" 10 | "os" 11 | "os/signal" 12 | "sort" 13 | "strconv" 14 | "strings" 15 | "syscall" 16 | 17 | "github.com/weaveworks/mesh" 18 | ) 19 | 20 | func main() { 21 | peers := &stringset{} 22 | var ( 23 | httpListen = flag.String("http", ":8080", "HTTP listen address") 24 | meshListen = flag.String("mesh", net.JoinHostPort("0.0.0.0", strconv.Itoa(mesh.Port)), "mesh listen address") 25 | hwaddr = flag.String("hwaddr", mustHardwareAddr(), "MAC address, i.e. mesh peer ID") 26 | nickname = flag.String("nickname", mustHostname(), "peer nickname") 27 | password = flag.String("password", "", "password (optional)") 28 | channel = flag.String("channel", "default", "gossip channel name") 29 | ) 30 | flag.Var(peers, "peer", "initial peer (may be repeated)") 31 | flag.Parse() 32 | 33 | logger := log.New(os.Stderr, *nickname+"> ", log.LstdFlags) 34 | 35 | host, portStr, err := net.SplitHostPort(*meshListen) 36 | if err != nil { 37 | logger.Fatalf("mesh address: %s: %v", *meshListen, err) 38 | } 39 | port, err := strconv.Atoi(portStr) 40 | if err != nil { 41 | logger.Fatalf("mesh address: %s: %v", *meshListen, err) 42 | } 43 | 44 | name, err := mesh.PeerNameFromString(*hwaddr) 45 | if err != nil { 46 | logger.Fatalf("%s: %v", *hwaddr, err) 47 | } 48 | 49 | router, err := mesh.NewRouter(mesh.Config{ 50 | Host: host, 51 | Port: port, 52 | ProtocolMinVersion: mesh.ProtocolMinVersion, 53 | Password: []byte(*password), 54 | ConnLimit: 64, 55 | PeerDiscovery: true, 56 | TrustedSubnets: []*net.IPNet{}, 57 | }, name, *nickname, mesh.NullOverlay{}, log.New(ioutil.Discard, "", 0)) 58 | 59 | if err != nil { 60 | logger.Fatalf("Could not create router: %v", err) 61 | } 62 | 63 | peer := newPeer(name, logger) 64 | gossip, err := router.NewGossip(*channel, peer) 65 | if err != nil { 66 | logger.Fatalf("Could not create gossip: %v", err) 67 | } 68 | 69 | peer.register(gossip) 70 | 71 | func() { 72 | logger.Printf("mesh router starting (%s)", *meshListen) 73 | router.Start() 74 | }() 75 | defer func() { 76 | logger.Printf("mesh router stopping") 77 | router.Stop() 78 | }() 79 | 80 | router.ConnectionMaker.InitiateConnections(peers.slice(), true) 81 | 82 | errs := make(chan error) 83 | go func() { 84 | c := make(chan os.Signal) 85 | signal.Notify(c, syscall.SIGINT) 86 | errs <- fmt.Errorf("%s", <-c) 87 | }() 88 | go func() { 89 | logger.Printf("HTTP server starting (%s)", *httpListen) 90 | http.HandleFunc("/", handle(peer)) 91 | errs <- http.ListenAndServe(*httpListen, nil) 92 | }() 93 | logger.Print(<-errs) 94 | } 95 | 96 | type counter interface { 97 | get() int 98 | incr() int 99 | } 100 | 101 | func handle(c counter) http.HandlerFunc { 102 | return func(w http.ResponseWriter, r *http.Request) { 103 | switch r.Method { 104 | case "GET": 105 | fmt.Fprintf(w, "get => %d\n", c.get()) 106 | 107 | case "POST": 108 | fmt.Fprintf(w, "incr => %d\n", c.incr()) 109 | } 110 | } 111 | } 112 | 113 | type stringset map[string]struct{} 114 | 115 | func (ss stringset) Set(value string) error { 116 | ss[value] = struct{}{} 117 | return nil 118 | } 119 | 120 | func (ss stringset) String() string { 121 | return strings.Join(ss.slice(), ",") 122 | } 123 | 124 | func (ss stringset) slice() []string { 125 | slice := make([]string, 0, len(ss)) 126 | for k := range ss { 127 | slice = append(slice, k) 128 | } 129 | sort.Strings(slice) 130 | return slice 131 | } 132 | 133 | func mustHardwareAddr() string { 134 | ifaces, err := net.Interfaces() 135 | if err != nil { 136 | panic(err) 137 | } 138 | for _, iface := range ifaces { 139 | if s := iface.HardwareAddr.String(); s != "" { 140 | return s 141 | } 142 | } 143 | panic("no valid network interfaces") 144 | } 145 | 146 | func mustHostname() string { 147 | hostname, err := os.Hostname() 148 | if err != nil { 149 | panic(err) 150 | } 151 | return hostname 152 | } 153 | -------------------------------------------------------------------------------- /examples/increment-only-counter/peer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | 6 | "bytes" 7 | "encoding/gob" 8 | 9 | "github.com/weaveworks/mesh" 10 | ) 11 | 12 | // Peer encapsulates state and implements mesh.Gossiper. 13 | // It should be passed to mesh.Router.NewGossip, 14 | // and the resulting Gossip registered in turn, 15 | // before calling mesh.Router.Start. 16 | type peer struct { 17 | st *state 18 | send mesh.Gossip 19 | actions chan<- func() 20 | quit chan struct{} 21 | logger *log.Logger 22 | } 23 | 24 | // peer implements mesh.Gossiper. 25 | var _ mesh.Gossiper = &peer{} 26 | 27 | // Construct a peer with empty state. 28 | // Be sure to register a channel, later, 29 | // so we can make outbound communication. 30 | func newPeer(self mesh.PeerName, logger *log.Logger) *peer { 31 | actions := make(chan func()) 32 | p := &peer{ 33 | st: newState(self), 34 | send: nil, // must .register() later 35 | actions: actions, 36 | quit: make(chan struct{}), 37 | logger: logger, 38 | } 39 | go p.loop(actions) 40 | return p 41 | } 42 | 43 | func (p *peer) loop(actions <-chan func()) { 44 | for { 45 | select { 46 | case f := <-actions: 47 | f() 48 | case <-p.quit: 49 | return 50 | } 51 | } 52 | } 53 | 54 | // register the result of a mesh.Router.NewGossip. 55 | func (p *peer) register(send mesh.Gossip) { 56 | p.actions <- func() { p.send = send } 57 | } 58 | 59 | // Return the current value of the counter. 60 | func (p *peer) get() int { 61 | return p.st.get() 62 | } 63 | 64 | // Increment the counter by one. 65 | func (p *peer) incr() (result int) { 66 | c := make(chan struct{}) 67 | p.actions <- func() { 68 | defer close(c) 69 | st := p.st.incr() 70 | if p.send != nil { 71 | p.send.GossipBroadcast(st) 72 | } else { 73 | p.logger.Printf("no sender configured; not broadcasting update right now") 74 | } 75 | result = st.get() 76 | } 77 | <-c 78 | return result 79 | } 80 | 81 | func (p *peer) stop() { 82 | close(p.quit) 83 | } 84 | 85 | // Return a copy of our complete state. 86 | func (p *peer) Gossip() (complete mesh.GossipData) { 87 | complete = p.st.copy() 88 | p.logger.Printf("Gossip => complete %v", complete.(*state).set) 89 | return complete 90 | } 91 | 92 | // Merge the gossiped data represented by buf into our state. 93 | // Return the state information that was modified. 94 | func (p *peer) OnGossip(buf []byte) (delta mesh.GossipData, err error) { 95 | var set map[mesh.PeerName]int 96 | if err := gob.NewDecoder(bytes.NewReader(buf)).Decode(&set); err != nil { 97 | return nil, err 98 | } 99 | 100 | delta = p.st.mergeDelta(set) 101 | if delta == nil { 102 | p.logger.Printf("OnGossip %v => delta %v", set, delta) 103 | } else { 104 | p.logger.Printf("OnGossip %v => delta %v", set, delta.(*state).set) 105 | } 106 | return delta, nil 107 | } 108 | 109 | // Merge the gossiped data represented by buf into our state. 110 | // Return the state information that was modified. 111 | func (p *peer) OnGossipBroadcast(src mesh.PeerName, buf []byte) (received mesh.GossipData, err error) { 112 | var set map[mesh.PeerName]int 113 | if err := gob.NewDecoder(bytes.NewReader(buf)).Decode(&set); err != nil { 114 | return nil, err 115 | } 116 | 117 | received = p.st.mergeReceived(set) 118 | if received == nil { 119 | p.logger.Printf("OnGossipBroadcast %s %v => delta %v", src, set, received) 120 | } else { 121 | p.logger.Printf("OnGossipBroadcast %s %v => delta %v", src, set, received.(*state).set) 122 | } 123 | return received, nil 124 | } 125 | 126 | // Merge the gossiped data represented by buf into our state. 127 | func (p *peer) OnGossipUnicast(src mesh.PeerName, buf []byte) error { 128 | var set map[mesh.PeerName]int 129 | if err := gob.NewDecoder(bytes.NewReader(buf)).Decode(&set); err != nil { 130 | return err 131 | } 132 | 133 | complete := p.st.mergeComplete(set) 134 | p.logger.Printf("OnGossipUnicast %s %v => complete %v", src, set, complete) 135 | return nil 136 | } 137 | -------------------------------------------------------------------------------- /examples/increment-only-counter/peer_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | "io/ioutil" 7 | "log" 8 | "reflect" 9 | "testing" 10 | 11 | "github.com/weaveworks/mesh" 12 | ) 13 | 14 | func TestPeerOnGossip(t *testing.T) { 15 | for _, testcase := range []struct { 16 | initial map[mesh.PeerName]int 17 | msg map[mesh.PeerName]int 18 | want map[mesh.PeerName]int 19 | }{ 20 | { 21 | map[mesh.PeerName]int{}, 22 | map[mesh.PeerName]int{123: 1, 456: 2}, 23 | map[mesh.PeerName]int{123: 1, 456: 2}, 24 | }, 25 | { 26 | map[mesh.PeerName]int{123: 1}, 27 | map[mesh.PeerName]int{123: 0, 456: 2}, 28 | map[mesh.PeerName]int{456: 2}, 29 | }, 30 | { 31 | map[mesh.PeerName]int{123: 9}, 32 | map[mesh.PeerName]int{123: 8}, 33 | nil, 34 | }, 35 | } { 36 | p := newPeer(mesh.PeerName(999), log.New(ioutil.Discard, "", 0)) 37 | p.st.mergeComplete(testcase.initial) 38 | var buf bytes.Buffer 39 | if err := gob.NewEncoder(&buf).Encode(testcase.msg); err != nil { 40 | t.Fatal(err) 41 | } 42 | delta, err := p.OnGossip(buf.Bytes()) 43 | if err != nil { 44 | t.Errorf("%v OnGossip %v: %v", testcase.initial, testcase.msg, err) 45 | continue 46 | } 47 | if want := testcase.want; want == nil { 48 | if delta != nil { 49 | t.Errorf("%v OnGossip %v: want nil, have non-nil", testcase.initial, testcase.msg) 50 | } 51 | } else { 52 | if have := delta.(*state).set; !reflect.DeepEqual(want, have) { 53 | t.Errorf("%v OnGossip %v: want %v, have %v", testcase.initial, testcase.msg, want, have) 54 | } 55 | } 56 | } 57 | } 58 | 59 | func TestPeerOnGossipBroadcast(t *testing.T) { 60 | for _, testcase := range []struct { 61 | initial map[mesh.PeerName]int 62 | msg map[mesh.PeerName]int 63 | want map[mesh.PeerName]int 64 | }{ 65 | { 66 | map[mesh.PeerName]int{}, 67 | map[mesh.PeerName]int{123: 1, 456: 2}, 68 | map[mesh.PeerName]int{123: 1, 456: 2}, 69 | }, 70 | { 71 | map[mesh.PeerName]int{123: 1}, 72 | map[mesh.PeerName]int{123: 0, 456: 2}, 73 | map[mesh.PeerName]int{456: 2}, 74 | }, 75 | { 76 | map[mesh.PeerName]int{123: 9}, 77 | map[mesh.PeerName]int{123: 8}, 78 | map[mesh.PeerName]int{}, // OnGossipBroadcast returns received, which should never be nil 79 | }, 80 | } { 81 | p := newPeer(999, log.New(ioutil.Discard, "", 0)) 82 | p.st.mergeComplete(testcase.initial) 83 | var buf bytes.Buffer 84 | if err := gob.NewEncoder(&buf).Encode(testcase.msg); err != nil { 85 | t.Fatal(err) 86 | } 87 | delta, err := p.OnGossipBroadcast(mesh.UnknownPeerName, buf.Bytes()) 88 | if err != nil { 89 | t.Errorf("%v OnGossipBroadcast %v: %v", testcase.initial, testcase.msg, err) 90 | continue 91 | } 92 | if want, have := testcase.want, delta.(*state).set; !reflect.DeepEqual(want, have) { 93 | t.Errorf("%v OnGossipBroadcast %v: want %v, have %v", testcase.initial, testcase.msg, want, have) 94 | } 95 | } 96 | } 97 | 98 | func TestPeerOnGossipUnicast(t *testing.T) { 99 | for _, testcase := range []struct { 100 | initial map[mesh.PeerName]int 101 | msg map[mesh.PeerName]int 102 | want map[mesh.PeerName]int 103 | }{ 104 | { 105 | map[mesh.PeerName]int{}, 106 | map[mesh.PeerName]int{123: 1, 456: 2}, 107 | map[mesh.PeerName]int{123: 1, 456: 2}, 108 | }, 109 | { 110 | map[mesh.PeerName]int{123: 1}, 111 | map[mesh.PeerName]int{123: 0, 456: 2}, 112 | map[mesh.PeerName]int{123: 1, 456: 2}, 113 | }, 114 | { 115 | map[mesh.PeerName]int{123: 9}, 116 | map[mesh.PeerName]int{123: 8}, 117 | map[mesh.PeerName]int{123: 9}, 118 | }, 119 | } { 120 | p := newPeer(999, log.New(ioutil.Discard, "", 0)) 121 | p.st.mergeComplete(testcase.initial) 122 | var buf bytes.Buffer 123 | if err := gob.NewEncoder(&buf).Encode(testcase.msg); err != nil { 124 | t.Fatal(err) 125 | } 126 | if err := p.OnGossipUnicast(mesh.UnknownPeerName, buf.Bytes()); err != nil { 127 | t.Errorf("%v OnGossipBroadcast %v: %v", testcase.initial, testcase.msg, err) 128 | continue 129 | } 130 | if want, have := testcase.want, p.st.set; !reflect.DeepEqual(want, have) { 131 | t.Errorf("%v OnGossipBroadcast %v: want %v, have %v", testcase.initial, testcase.msg, want, have) 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /examples/increment-only-counter/state.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "sync" 6 | 7 | "encoding/gob" 8 | 9 | "github.com/weaveworks/mesh" 10 | ) 11 | 12 | // state is an implementation of a G-counter. 13 | type state struct { 14 | mtx sync.RWMutex 15 | set map[mesh.PeerName]int 16 | self mesh.PeerName 17 | } 18 | 19 | // state implements GossipData. 20 | var _ mesh.GossipData = &state{} 21 | 22 | // Construct an empty state object, ready to receive updates. 23 | // This is suitable to use at program start. 24 | // Other peers will populate us with data. 25 | func newState(self mesh.PeerName) *state { 26 | return &state{ 27 | set: map[mesh.PeerName]int{}, 28 | self: self, 29 | } 30 | } 31 | 32 | func (st *state) get() (result int) { 33 | st.mtx.RLock() 34 | defer st.mtx.RUnlock() 35 | for _, v := range st.set { 36 | result += v 37 | } 38 | return result 39 | } 40 | 41 | func (st *state) incr() (complete *state) { 42 | st.mtx.Lock() 43 | defer st.mtx.Unlock() 44 | st.set[st.self]++ 45 | return &state{ 46 | set: st.set, 47 | } 48 | } 49 | 50 | func (st *state) copy() *state { 51 | st.mtx.RLock() 52 | defer st.mtx.RUnlock() 53 | return &state{ 54 | set: st.set, 55 | } 56 | } 57 | 58 | // Encode serializes our complete state to a slice of byte-slices. 59 | // In this simple example, we use a single gob-encoded 60 | // buffer: see https://golang.org/pkg/encoding/gob/ 61 | func (st *state) Encode() [][]byte { 62 | st.mtx.RLock() 63 | defer st.mtx.RUnlock() 64 | var buf bytes.Buffer 65 | if err := gob.NewEncoder(&buf).Encode(st.set); err != nil { 66 | panic(err) 67 | } 68 | return [][]byte{buf.Bytes()} 69 | } 70 | 71 | // Merge merges the other GossipData into this one, 72 | // and returns our resulting, complete state. 73 | func (st *state) Merge(other mesh.GossipData) (complete mesh.GossipData) { 74 | return st.mergeComplete(other.(*state).copy().set) 75 | } 76 | 77 | // Merge the set into our state, abiding increment-only semantics. 78 | // Return a non-nil mesh.GossipData representation of the received set. 79 | func (st *state) mergeReceived(set map[mesh.PeerName]int) (received mesh.GossipData) { 80 | st.mtx.Lock() 81 | defer st.mtx.Unlock() 82 | 83 | for peer, v := range set { 84 | if v <= st.set[peer] { 85 | delete(set, peer) // optimization: make the forwarded data smaller 86 | continue 87 | } 88 | st.set[peer] = v 89 | } 90 | 91 | return &state{ 92 | set: set, // all remaining elements were novel to us 93 | } 94 | } 95 | 96 | // Merge the set into our state, abiding increment-only semantics. 97 | // Return any key/values that have been mutated, or nil if nothing changed. 98 | func (st *state) mergeDelta(set map[mesh.PeerName]int) (delta mesh.GossipData) { 99 | st.mtx.Lock() 100 | defer st.mtx.Unlock() 101 | 102 | for peer, v := range set { 103 | if v <= st.set[peer] { 104 | delete(set, peer) // requirement: it's not part of a delta 105 | continue 106 | } 107 | st.set[peer] = v 108 | } 109 | 110 | if len(set) <= 0 { 111 | return nil // per OnGossip requirements 112 | } 113 | return &state{ 114 | set: set, // all remaining elements were novel to us 115 | } 116 | } 117 | 118 | // Merge the set into our state, abiding increment-only semantics. 119 | // Return our resulting, complete state. 120 | func (st *state) mergeComplete(set map[mesh.PeerName]int) (complete mesh.GossipData) { 121 | st.mtx.Lock() 122 | defer st.mtx.Unlock() 123 | 124 | for peer, v := range set { 125 | if v > st.set[peer] { 126 | st.set[peer] = v 127 | } 128 | } 129 | 130 | return &state{ 131 | set: st.set, // n.b. can't .copy() due to lock contention 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /examples/increment-only-counter/state_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | 7 | "github.com/weaveworks/mesh" 8 | ) 9 | 10 | func TestStateMergeReceived(t *testing.T) { 11 | for _, testcase := range []struct { 12 | initial map[mesh.PeerName]int 13 | merge map[mesh.PeerName]int 14 | want map[mesh.PeerName]int 15 | }{ 16 | { 17 | map[mesh.PeerName]int{}, 18 | map[mesh.PeerName]int{123: 1, 456: 2}, 19 | map[mesh.PeerName]int{123: 1, 456: 2}, 20 | }, 21 | { 22 | map[mesh.PeerName]int{123: 1, 456: 2}, 23 | map[mesh.PeerName]int{123: 1, 456: 2}, 24 | map[mesh.PeerName]int{}, 25 | }, 26 | { 27 | map[mesh.PeerName]int{123: 1, 456: 2}, 28 | map[mesh.PeerName]int{789: 3}, 29 | map[mesh.PeerName]int{789: 3}, 30 | }, 31 | { 32 | map[mesh.PeerName]int{456: 3}, 33 | map[mesh.PeerName]int{123: 1, 456: 2}, 34 | map[mesh.PeerName]int{123: 1}, // we drop keys that don't semantically merge 35 | }, 36 | } { 37 | initial, merge := testcase.initial, testcase.merge // mergeReceived modifies arguments 38 | delta := newState(999).mergeComplete(initial).(*state).mergeReceived(merge) 39 | if want, have := testcase.want, delta.(*state).set; !reflect.DeepEqual(want, have) { 40 | t.Errorf("%v mergeReceived %v: want %v, have %v", testcase.initial, testcase.merge, want, have) 41 | } 42 | } 43 | } 44 | 45 | func TestStateMergeDelta(t *testing.T) { 46 | for _, testcase := range []struct { 47 | initial map[mesh.PeerName]int 48 | merge map[mesh.PeerName]int 49 | want map[mesh.PeerName]int 50 | }{ 51 | { 52 | map[mesh.PeerName]int{}, 53 | map[mesh.PeerName]int{123: 1, 456: 2}, 54 | map[mesh.PeerName]int{123: 1, 456: 2}, 55 | }, 56 | { 57 | map[mesh.PeerName]int{123: 1, 456: 2}, 58 | map[mesh.PeerName]int{123: 1, 456: 2}, 59 | nil, 60 | }, 61 | { 62 | map[mesh.PeerName]int{123: 1, 456: 2}, 63 | map[mesh.PeerName]int{789: 3}, 64 | map[mesh.PeerName]int{789: 3}, 65 | }, 66 | { 67 | map[mesh.PeerName]int{123: 1, 456: 2}, 68 | map[mesh.PeerName]int{456: 3}, 69 | map[mesh.PeerName]int{456: 3}, 70 | }, 71 | } { 72 | initial, merge := testcase.initial, testcase.merge // mergeDelta modifies arguments 73 | delta := newState(999).mergeComplete(initial).(*state).mergeDelta(merge) 74 | if want := testcase.want; want == nil { 75 | if delta != nil { 76 | t.Errorf("%v mergeDelta %v: want nil, have non-nil", testcase.initial, testcase.merge) 77 | } 78 | } else { 79 | if have := delta.(*state).set; !reflect.DeepEqual(want, have) { 80 | t.Errorf("%v mergeDelta %v: want %v, have %v", testcase.initial, testcase.merge, want, have) 81 | } 82 | } 83 | } 84 | } 85 | 86 | func TestStateMergeComplete(t *testing.T) { 87 | for _, testcase := range []struct { 88 | initial map[mesh.PeerName]int 89 | merge map[mesh.PeerName]int 90 | want map[mesh.PeerName]int 91 | }{ 92 | { 93 | map[mesh.PeerName]int{}, 94 | map[mesh.PeerName]int{123: 1, 456: 2}, 95 | map[mesh.PeerName]int{123: 1, 456: 2}, 96 | }, 97 | { 98 | map[mesh.PeerName]int{123: 1, 456: 2}, 99 | map[mesh.PeerName]int{123: 1, 456: 2}, 100 | map[mesh.PeerName]int{123: 1, 456: 2}, 101 | }, 102 | { 103 | map[mesh.PeerName]int{123: 1, 456: 2}, 104 | map[mesh.PeerName]int{789: 3}, 105 | map[mesh.PeerName]int{123: 1, 456: 2, 789: 3}, 106 | }, 107 | { 108 | map[mesh.PeerName]int{123: 1, 456: 2}, 109 | map[mesh.PeerName]int{123: 0, 456: 3}, 110 | map[mesh.PeerName]int{123: 1, 456: 3}, 111 | }, 112 | } { 113 | st := newState(999).mergeComplete(testcase.initial).(*state).mergeComplete(testcase.merge).(*state) 114 | if want, have := testcase.want, st.set; !reflect.DeepEqual(want, have) { 115 | t.Errorf("%v mergeComplete %v: want %v, have %v", testcase.initial, testcase.merge, want, have) 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/weaveworks/mesh 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/stretchr/testify v1.4.0 7 | golang.org/x/crypto v0.0.0-20191002192127-34f69633bfdc 8 | ) 9 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 6 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= 7 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 8 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 9 | golang.org/x/crypto v0.0.0-20191002192127-34f69633bfdc h1:c0o/qxkaO2LF5t6fQrT4b5hzyggAkLLlCUjqfRxd8Q4= 10 | golang.org/x/crypto v0.0.0-20191002192127-34f69633bfdc/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 11 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 12 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 13 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 14 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 15 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 16 | gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= 17 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 18 | -------------------------------------------------------------------------------- /gossip.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import "sync" 4 | 5 | // Gossip is the sending interface. 6 | // 7 | // TODO(pb): rename to e.g. Sender 8 | type Gossip interface { 9 | // GossipUnicast emits a single message to a peer in the mesh. 10 | // 11 | // TODO(pb): rename to Unicast? 12 | // 13 | // Unicast takes []byte instead of GossipData because "to date there has 14 | // been no compelling reason [in practice] to do merging on unicast." 15 | // But there may be some motivation to have unicast Mergeable; see 16 | // https://github.com/weaveworks/weave/issues/1764 17 | // 18 | // TODO(pb): for uniformity of interface, rather take GossipData? 19 | GossipUnicast(dst PeerName, msg []byte) error 20 | 21 | // GossipBroadcast emits a message to all peers in the mesh. 22 | // 23 | // TODO(pb): rename to Broadcast? 24 | GossipBroadcast(update GossipData) 25 | 26 | // GossipNeighbourSubset emits a message to subset of neighbour peers in the mesh. 27 | GossipNeighbourSubset(update GossipData) 28 | } 29 | 30 | // Gossiper is the receiving interface. 31 | // 32 | // TODO(pb): rename to e.g. Receiver 33 | type Gossiper interface { 34 | // OnGossipUnicast merges received data into state. 35 | // 36 | // TODO(pb): rename to e.g. OnUnicast 37 | OnGossipUnicast(src PeerName, msg []byte) error 38 | 39 | // OnGossipBroadcast merges received data into state and returns a 40 | // representation of the received data (typically a delta) for further 41 | // propagation. 42 | // 43 | // TODO(pb): rename to e.g. OnBroadcast 44 | OnGossipBroadcast(src PeerName, update []byte) (received GossipData, err error) 45 | 46 | // Gossip returns the state of everything we know; gets called periodically. 47 | Gossip() (complete GossipData) 48 | 49 | // OnGossip merges received data into state and returns "everything new 50 | // I've just learnt", or nil if nothing in the received data was new. 51 | OnGossip(msg []byte) (delta GossipData, err error) 52 | } 53 | 54 | // GossipData is a merge-able dataset. 55 | // Think: log-structured data. 56 | type GossipData interface { 57 | // Encode encodes the data into multiple byte-slices. 58 | Encode() [][]byte 59 | 60 | // Merge combines another GossipData into this one and returns the result. 61 | // 62 | // TODO(pb): does it need to be leave the original unmodified? 63 | Merge(GossipData) GossipData 64 | } 65 | 66 | // GossipSender accumulates GossipData that needs to be sent to one 67 | // destination, and sends it when possible. GossipSender is one-to-one with a 68 | // channel. 69 | type gossipSender struct { 70 | sync.Mutex 71 | makeMsg func(msg []byte) protocolMsg 72 | makeBroadcastMsg func(srcName PeerName, msg []byte) protocolMsg 73 | sender protocolSender 74 | gossip GossipData 75 | broadcasts map[PeerName]GossipData 76 | more chan<- struct{} 77 | flush chan<- chan<- bool // for testing 78 | } 79 | 80 | // NewGossipSender constructs a usable GossipSender. 81 | func newGossipSender( 82 | makeMsg func(msg []byte) protocolMsg, 83 | makeBroadcastMsg func(srcName PeerName, msg []byte) protocolMsg, 84 | sender protocolSender, 85 | stop <-chan struct{}, 86 | ) *gossipSender { 87 | more := make(chan struct{}, 1) 88 | flush := make(chan chan<- bool) 89 | s := &gossipSender{ 90 | makeMsg: makeMsg, 91 | makeBroadcastMsg: makeBroadcastMsg, 92 | sender: sender, 93 | broadcasts: make(map[PeerName]GossipData), 94 | more: more, 95 | flush: flush, 96 | } 97 | go s.run(stop, more, flush) 98 | return s 99 | } 100 | 101 | func (s *gossipSender) run(stop <-chan struct{}, more <-chan struct{}, flush <-chan chan<- bool) { 102 | sent := false 103 | for { 104 | select { 105 | case <-stop: 106 | return 107 | case <-more: 108 | sentSomething, err := s.deliver(stop) 109 | if err != nil { 110 | return 111 | } 112 | sent = sent || sentSomething 113 | case ch := <-flush: // for testing 114 | // send anything pending, then reply back whether we sent 115 | // anything since previous flush 116 | select { 117 | case <-more: 118 | sentSomething, err := s.deliver(stop) 119 | if err != nil { 120 | return 121 | } 122 | sent = sent || sentSomething 123 | default: 124 | } 125 | ch <- sent 126 | sent = false 127 | } 128 | } 129 | } 130 | 131 | func (s *gossipSender) deliver(stop <-chan struct{}) (bool, error) { 132 | sent := false 133 | // We must not hold our lock when sending, since that would block 134 | // the callers of Send/Broadcast while we are stuck waiting for 135 | // network congestion to clear. So we pick and send one piece of 136 | // data at a time, only holding the lock during the picking. 137 | for { 138 | select { 139 | case <-stop: 140 | return sent, nil 141 | default: 142 | } 143 | data, makeProtocolMsg := s.pick() 144 | if data == nil { 145 | return sent, nil 146 | } 147 | for _, msg := range data.Encode() { 148 | if err := s.sender.SendProtocolMsg(makeProtocolMsg(msg)); err != nil { 149 | return sent, err 150 | } 151 | } 152 | sent = true 153 | } 154 | } 155 | 156 | func (s *gossipSender) pick() (data GossipData, makeProtocolMsg func(msg []byte) protocolMsg) { 157 | s.Lock() 158 | defer s.Unlock() 159 | switch { 160 | case s.gossip != nil: // usually more important than broadcasts 161 | data = s.gossip 162 | makeProtocolMsg = s.makeMsg 163 | s.gossip = nil 164 | case len(s.broadcasts) > 0: 165 | for srcName, d := range s.broadcasts { 166 | data = d 167 | makeProtocolMsg = func(msg []byte) protocolMsg { return s.makeBroadcastMsg(srcName, msg) } 168 | delete(s.broadcasts, srcName) 169 | break 170 | } 171 | } 172 | return 173 | } 174 | 175 | // Send accumulates the GossipData and will send it eventually. 176 | // Send and Broadcast accumulate into different buckets. 177 | func (s *gossipSender) Send(data GossipData) { 178 | s.Lock() 179 | defer s.Unlock() 180 | if s.empty() { 181 | defer s.prod() 182 | } 183 | if s.gossip == nil { 184 | s.gossip = data 185 | } else { 186 | s.gossip = s.gossip.Merge(data) 187 | } 188 | } 189 | 190 | // Broadcast accumulates the GossipData under the given srcName and will send 191 | // it eventually. Send and Broadcast accumulate into different buckets. 192 | func (s *gossipSender) Broadcast(srcName PeerName, data GossipData) { 193 | s.Lock() 194 | defer s.Unlock() 195 | if s.empty() { 196 | defer s.prod() 197 | } 198 | d, found := s.broadcasts[srcName] 199 | if !found { 200 | s.broadcasts[srcName] = data 201 | } else { 202 | s.broadcasts[srcName] = d.Merge(data) 203 | } 204 | } 205 | 206 | func (s *gossipSender) empty() bool { return s.gossip == nil && len(s.broadcasts) == 0 } 207 | 208 | func (s *gossipSender) prod() { 209 | select { 210 | case s.more <- struct{}{}: 211 | default: 212 | } 213 | } 214 | 215 | // Flush sends all pending data, and returns true if anything was sent since 216 | // the previous flush. For testing. 217 | func (s *gossipSender) Flush() bool { 218 | ch := make(chan bool) 219 | s.flush <- ch 220 | return <-ch 221 | } 222 | 223 | // gossipSenders wraps a ProtocolSender (e.g. a LocalConnection) and yields 224 | // per-channel GossipSenders. 225 | // TODO(pb): may be able to remove this and use makeGossipSender directly 226 | type gossipSenders struct { 227 | sync.Mutex 228 | sender protocolSender 229 | stop <-chan struct{} 230 | senders map[string]*gossipSender 231 | } 232 | 233 | // NewGossipSenders returns a usable GossipSenders leveraging the ProtocolSender. 234 | // TODO(pb): is stop chan the best way to do that? 235 | func newGossipSenders(sender protocolSender, stop <-chan struct{}) *gossipSenders { 236 | return &gossipSenders{ 237 | sender: sender, 238 | stop: stop, 239 | senders: make(map[string]*gossipSender), 240 | } 241 | } 242 | 243 | // Sender yields the GossipSender for the named channel. 244 | // It will use the factory function if no sender yet exists. 245 | func (gs *gossipSenders) Sender(channelName string, makeGossipSender func(sender protocolSender, stop <-chan struct{}) *gossipSender) *gossipSender { 246 | gs.Lock() 247 | defer gs.Unlock() 248 | s, found := gs.senders[channelName] 249 | if !found { 250 | s = makeGossipSender(gs.sender, gs.stop) 251 | gs.senders[channelName] = s 252 | } 253 | return s 254 | } 255 | 256 | // Flush flushes all managed senders. Used for testing. 257 | func (gs *gossipSenders) Flush() bool { 258 | sent := false 259 | gs.Lock() 260 | defer gs.Unlock() 261 | for _, sender := range gs.senders { 262 | sent = sender.Flush() || sent 263 | } 264 | return sent 265 | } 266 | 267 | // GossipChannels is an index of channel name to gossip channel. 268 | type gossipChannels map[string]*gossipChannel 269 | 270 | type gossipConnection interface { 271 | gossipSenders() *gossipSenders 272 | } 273 | -------------------------------------------------------------------------------- /gossip_channel.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | "fmt" 7 | ) 8 | 9 | // gossipChannel is a logical communication channel within a physical mesh. 10 | type gossipChannel struct { 11 | name string 12 | ourself *localPeer 13 | routes *routes 14 | gossiper Gossiper 15 | logger Logger 16 | } 17 | 18 | // newGossipChannel returns a named, usable channel. 19 | // It delegates receiving duties to the passed Gossiper. 20 | func newGossipChannel(channelName string, ourself *localPeer, r *routes, g Gossiper, logger Logger) *gossipChannel { 21 | return &gossipChannel{ 22 | name: channelName, 23 | ourself: ourself, 24 | routes: r, 25 | gossiper: g, 26 | logger: logger, 27 | } 28 | } 29 | 30 | func (c *gossipChannel) deliverUnicast(srcName PeerName, origPayload []byte, dec *gob.Decoder) error { 31 | var destName PeerName 32 | if err := dec.Decode(&destName); err != nil { 33 | return err 34 | } 35 | if c.ourself.Name == destName { 36 | var payload []byte 37 | if err := dec.Decode(&payload); err != nil { 38 | return err 39 | } 40 | return c.gossiper.OnGossipUnicast(srcName, payload) 41 | } 42 | if err := c.relayUnicast(destName, origPayload); err != nil { 43 | c.logf("%v", err) 44 | } 45 | return nil 46 | } 47 | 48 | func (c *gossipChannel) deliverBroadcast(srcName PeerName, _ []byte, dec *gob.Decoder) error { 49 | var payload []byte 50 | if err := dec.Decode(&payload); err != nil { 51 | return err 52 | } 53 | data, err := c.gossiper.OnGossipBroadcast(srcName, payload) 54 | if err != nil || data == nil { 55 | return err 56 | } 57 | c.relayBroadcast(srcName, data) 58 | return nil 59 | } 60 | 61 | func (c *gossipChannel) deliver(srcName PeerName, _ []byte, dec *gob.Decoder) error { 62 | var payload []byte 63 | if err := dec.Decode(&payload); err != nil { 64 | return err 65 | } 66 | update, err := c.gossiper.OnGossip(payload) 67 | if err != nil || update == nil { 68 | return err 69 | } 70 | c.relay(srcName, update) 71 | return nil 72 | } 73 | 74 | // GossipUnicast implements Gossip, relaying msg to dst, which must be a 75 | // member of the channel. 76 | func (c *gossipChannel) GossipUnicast(dstPeerName PeerName, msg []byte) error { 77 | return c.relayUnicast(dstPeerName, gobEncode(c.name, c.ourself.Name, dstPeerName, msg)) 78 | } 79 | 80 | // GossipBroadcast implements Gossip, relaying update to all members of the 81 | // channel. 82 | func (c *gossipChannel) GossipBroadcast(update GossipData) { 83 | c.relayBroadcast(c.ourself.Name, update) 84 | } 85 | 86 | // GossipNeighbourSubset implements Gossip, relaying update to subset of members of the 87 | // channel. 88 | func (c *gossipChannel) GossipNeighbourSubset(update GossipData) { 89 | c.relay(c.ourself.Name, update) 90 | } 91 | 92 | // Send relays data into the channel topology via random neighbours. 93 | func (c *gossipChannel) Send(data GossipData) { 94 | c.relay(c.ourself.Name, data) 95 | } 96 | 97 | // SendDown relays data into the channel topology via conn. 98 | func (c *gossipChannel) SendDown(conn Connection, data GossipData) { 99 | c.senderFor(conn).Send(data) 100 | } 101 | 102 | func (c *gossipChannel) relayUnicast(dstPeerName PeerName, buf []byte) (err error) { 103 | if relayPeerName, found := c.routes.UnicastAll(dstPeerName); !found { 104 | err = fmt.Errorf("unknown relay destination: %s", dstPeerName) 105 | } else if conn, found := c.ourself.ConnectionTo(relayPeerName); !found { 106 | err = fmt.Errorf("unable to find connection to relay peer %s", relayPeerName) 107 | } else { 108 | err = conn.(protocolSender).SendProtocolMsg(protocolMsg{ProtocolGossipUnicast, buf}) 109 | } 110 | return err 111 | } 112 | 113 | func (c *gossipChannel) relayBroadcast(srcName PeerName, update GossipData) { 114 | c.routes.ensureRecalculated() 115 | for _, conn := range c.ourself.ConnectionsTo(c.routes.BroadcastAll(srcName)) { 116 | c.senderFor(conn).Broadcast(srcName, update) 117 | } 118 | } 119 | 120 | func (c *gossipChannel) relay(srcName PeerName, data GossipData) { 121 | c.routes.ensureRecalculated() 122 | for _, conn := range c.ourself.ConnectionsTo(c.routes.randomNeighbours(srcName)) { 123 | c.senderFor(conn).Send(data) 124 | } 125 | } 126 | 127 | func (c *gossipChannel) senderFor(conn Connection) *gossipSender { 128 | return conn.(gossipConnection).gossipSenders().Sender(c.name, c.makeGossipSender) 129 | } 130 | 131 | func (c *gossipChannel) makeGossipSender(sender protocolSender, stop <-chan struct{}) *gossipSender { 132 | return newGossipSender(c.makeMsg, c.makeBroadcastMsg, sender, stop) 133 | } 134 | 135 | func (c *gossipChannel) makeMsg(msg []byte) protocolMsg { 136 | return protocolMsg{ProtocolGossip, gobEncode(c.name, c.ourself.Name, msg)} 137 | } 138 | 139 | func (c *gossipChannel) makeBroadcastMsg(srcName PeerName, msg []byte) protocolMsg { 140 | return protocolMsg{ProtocolGossipBroadcast, gobEncode(c.name, srcName, msg)} 141 | } 142 | 143 | func (c *gossipChannel) logf(format string, args ...interface{}) { 144 | format = "[gossip " + c.name + "]: " + format 145 | c.logger.Printf(format, args...) 146 | } 147 | 148 | // GobEncode gob-encodes each item and returns the resulting byte slice. 149 | func gobEncode(items ...interface{}) []byte { 150 | buf := new(bytes.Buffer) 151 | enc := gob.NewEncoder(buf) 152 | for _, i := range items { 153 | if err := enc.Encode(i); err != nil { 154 | panic(err) 155 | } 156 | } 157 | return buf.Bytes() 158 | } 159 | -------------------------------------------------------------------------------- /gossip_test.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "log" 7 | "math" 8 | "sync" 9 | "testing" 10 | 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | // TODO test gossip unicast; atm we only test topology gossip and 15 | // surrogates, neither of which employ unicast. 16 | 17 | type mockGossipConnection struct { 18 | remoteConnection 19 | dest *Router 20 | senders *gossipSenders 21 | start chan struct{} 22 | } 23 | 24 | var _ gossipConnection = &mockGossipConnection{} 25 | 26 | func newTestRouter(t *testing.T, name string) *Router { 27 | peerName, _ := PeerNameFromString(name) 28 | router, err := NewRouter(Config{}, peerName, "nick", nil, log.New(ioutil.Discard, "", 0)) 29 | require.NoError(t, err) 30 | router.Start() 31 | return router 32 | } 33 | 34 | func (conn *mockGossipConnection) breakTie(dupConn ourConnection) connectionTieBreak { 35 | return tieBreakTied 36 | } 37 | 38 | func (conn *mockGossipConnection) shutdown(err error) { 39 | } 40 | 41 | func (conn *mockGossipConnection) logf(format string, args ...interface{}) { 42 | format = "->[" + conn.remoteTCPAddr + "|" + conn.remote.String() + "]: " + format 43 | if len(format) == 0 || format[len(format)-1] != '\n' { 44 | format += "\n" 45 | } 46 | fmt.Printf(format, args...) 47 | } 48 | 49 | func (conn *mockGossipConnection) SendProtocolMsg(pm protocolMsg) error { 50 | <-conn.start 51 | return conn.dest.handleGossip(pm.tag, pm.msg) 52 | } 53 | 54 | func (conn *mockGossipConnection) gossipSenders() *gossipSenders { 55 | return conn.senders 56 | } 57 | 58 | func (conn *mockGossipConnection) Start() { 59 | close(conn.start) 60 | } 61 | 62 | func sendPendingGossip(routers ...*Router) { 63 | // Loop until all routers report they didn't send anything 64 | for sentSomething := true; sentSomething; { 65 | sentSomething = false 66 | for _, router := range routers { 67 | sentSomething = router.sendPendingGossip() || sentSomething 68 | } 69 | } 70 | } 71 | 72 | func sendPendingTopologyUpdates(routers ...*Router) { 73 | for _, router := range routers { 74 | router.Ourself.Lock() 75 | pendingUpdate := router.Ourself.pendingTopologyUpdate 76 | router.Ourself.Unlock() 77 | if pendingUpdate { 78 | router.Ourself.broadcastPendingTopologyUpdates() 79 | } 80 | } 81 | } 82 | 83 | func addTestGossipConnection(t require.TestingT, r1, r2 *Router) { 84 | c1 := r1.newTestGossipConnection(t, r2) 85 | c2 := r2.newTestGossipConnection(t, r1) 86 | c1.Start() 87 | c2.Start() 88 | } 89 | 90 | func (router *Router) newTestGossipConnection(t require.TestingT, r *Router) *mockGossipConnection { 91 | to := r.Ourself.Peer 92 | toPeer := newPeer(to.Name, to.NickName, to.UID, 0, to.ShortID) 93 | toPeer = router.Peers.fetchWithDefault(toPeer) // Has side-effect of incrementing refcount 94 | 95 | conn := &mockGossipConnection{ 96 | remoteConnection: *newRemoteConnection(router.Ourself.Peer, toPeer, "", false, true), 97 | dest: r, 98 | start: make(chan struct{}), 99 | } 100 | conn.senders = newGossipSenders(conn, make(chan struct{})) 101 | require.NoError(t, router.Ourself.handleAddConnection(conn, false)) 102 | router.Ourself.handleConnectionEstablished(conn) 103 | return conn 104 | } 105 | 106 | func (router *Router) DeleteTestGossipConnection(r *Router) { 107 | toName := r.Ourself.Peer.Name 108 | conn, _ := router.Ourself.ConnectionTo(toName) 109 | router.Peers.dereference(conn.Remote()) 110 | router.Ourself.handleDeleteConnection(conn.(ourConnection)) 111 | } 112 | 113 | // Create a Peer representing the receiver router, with connections to 114 | // the routers supplied as arguments, carrying across all UID and 115 | // version information. 116 | func (router *Router) tp(routers ...*Router) *Peer { 117 | peer := newPeerFrom(router.Ourself.Peer) 118 | connections := make(map[PeerName]Connection) 119 | for _, r := range routers { 120 | p := newPeerFrom(r.Ourself.Peer) 121 | connections[r.Ourself.Peer.Name] = newMockConnection(peer, p) 122 | } 123 | peer.Version = router.Ourself.Peer.Version 124 | peer.connections = connections 125 | return peer 126 | } 127 | 128 | // Check that the topology of router matches the peers and all of their connections 129 | func checkTopology(t *testing.T, router *Router, wantedPeers ...*Peer) { 130 | router.Peers.RLock() 131 | checkTopologyPeers(t, true, router.Peers.allPeers(), wantedPeers...) 132 | router.Peers.RUnlock() 133 | } 134 | 135 | func flushAndCheckTopology(t *testing.T, routers []*Router, wantedPeers ...*Peer) { 136 | sendPendingTopologyUpdates(routers...) 137 | sendPendingGossip(routers...) 138 | for _, r := range routers { 139 | checkTopology(t, r, wantedPeers...) 140 | } 141 | } 142 | 143 | func TestGossipTopology(t *testing.T) { 144 | // Create some peers that will talk to each other 145 | r1 := newTestRouter(t, "01:00:00:01:00:00") 146 | r2 := newTestRouter(t, "02:00:00:02:00:00") 147 | r3 := newTestRouter(t, "03:00:00:03:00:00") 148 | routers := []*Router{r1, r2, r3} 149 | // Check state when they have no connections 150 | checkTopology(t, r1, r1.tp()) 151 | checkTopology(t, r2, r2.tp()) 152 | 153 | // Now try adding some connections 154 | addTestGossipConnection(t, r1, r2) 155 | sendPendingGossip(r1, r2) 156 | checkTopology(t, r1, r1.tp(r2), r2.tp(r1)) 157 | checkTopology(t, r2, r1.tp(r2), r2.tp(r1)) 158 | 159 | addTestGossipConnection(t, r2, r3) 160 | flushAndCheckTopology(t, routers, r1.tp(r2), r2.tp(r1, r3), r3.tp(r2)) 161 | 162 | addTestGossipConnection(t, r3, r1) 163 | flushAndCheckTopology(t, routers, r1.tp(r2, r3), r2.tp(r1, r3), r3.tp(r1, r2)) 164 | 165 | // Drop the connection from 2 to 3 166 | r2.DeleteTestGossipConnection(r3) 167 | flushAndCheckTopology(t, routers, r1.tp(r2, r3), r2.tp(r1), r3.tp(r1, r2)) 168 | 169 | // Drop the connection from 1 to 3 170 | r1.DeleteTestGossipConnection(r3) 171 | sendPendingTopologyUpdates(routers...) 172 | sendPendingGossip(r1, r2, r3) 173 | forcePendingGC(r1, r2, r3) 174 | checkTopology(t, r1, r1.tp(r2), r2.tp(r1)) 175 | checkTopology(t, r2, r1.tp(r2), r2.tp(r1)) 176 | // r3 still thinks r1 has a connection to it 177 | checkTopology(t, r3, r1.tp(r2, r3), r2.tp(r1), r3.tp(r1, r2)) 178 | } 179 | 180 | func TestGossipSurrogate(t *testing.T) { 181 | // create the topology r1 <-> r2 <-> r3 182 | r1 := newTestRouter(t, "01:00:00:01:00:00") 183 | r2 := newTestRouter(t, "02:00:00:02:00:00") 184 | r3 := newTestRouter(t, "03:00:00:03:00:00") 185 | routers := []*Router{r1, r2, r3} 186 | addTestGossipConnection(t, r1, r2) 187 | addTestGossipConnection(t, r3, r2) 188 | flushAndCheckTopology(t, routers, r1.tp(r2), r2.tp(r1, r3), r3.tp(r2)) 189 | 190 | // create a gossiper at either end, but not the middle 191 | g1 := newTestGossiper() 192 | g3 := newTestGossiper() 193 | s1, err := r1.NewGossip("Test", g1) 194 | require.NoError(t, err) 195 | s3, err := r3.NewGossip("Test", g3) 196 | require.NoError(t, err) 197 | 198 | // broadcast a message from each end, check it reaches the other 199 | broadcast(s1, 1) 200 | broadcast(s3, 2) 201 | sendPendingGossip(r1, r2, r3) 202 | g1.checkHas(t, 2) 203 | g3.checkHas(t, 1) 204 | 205 | // check that each end gets their message back through periodic 206 | // gossip 207 | r1.sendAllGossip() 208 | r3.sendAllGossip() 209 | sendPendingGossip(r1, r2, r3) 210 | g1.checkHas(t, 1, 2) 211 | g3.checkHas(t, 1, 2) 212 | } 213 | 214 | type testGossiper struct { 215 | sync.RWMutex 216 | state map[byte]struct{} 217 | } 218 | 219 | func newTestGossiper() *testGossiper { 220 | return &testGossiper{state: make(map[byte]struct{})} 221 | } 222 | 223 | func (g *testGossiper) OnGossipUnicast(sender PeerName, msg []byte) error { 224 | return nil 225 | } 226 | 227 | func (g *testGossiper) OnGossipBroadcast(_ PeerName, update []byte) (GossipData, error) { 228 | g.Lock() 229 | defer g.Unlock() 230 | for _, v := range update { 231 | g.state[v] = struct{}{} 232 | } 233 | return newSurrogateGossipData(update), nil 234 | } 235 | 236 | func (g *testGossiper) Gossip() GossipData { 237 | g.RLock() 238 | defer g.RUnlock() 239 | state := make([]byte, len(g.state)) 240 | for v := range g.state { 241 | state = append(state, v) 242 | } 243 | return newSurrogateGossipData(state) 244 | } 245 | 246 | func (g *testGossiper) OnGossip(update []byte) (GossipData, error) { 247 | g.Lock() 248 | defer g.Unlock() 249 | var delta []byte 250 | for _, v := range update { 251 | if _, found := g.state[v]; !found { 252 | delta = append(delta, v) 253 | g.state[v] = struct{}{} 254 | } 255 | } 256 | if len(delta) == 0 { 257 | return nil, nil 258 | } 259 | return newSurrogateGossipData(delta), nil 260 | } 261 | 262 | func (g *testGossiper) checkHas(t *testing.T, vs ...byte) { 263 | g.RLock() 264 | defer g.RUnlock() 265 | for _, v := range vs { 266 | if _, found := g.state[v]; !found { 267 | require.FailNow(t, fmt.Sprintf("%d is missing", v)) 268 | } 269 | } 270 | } 271 | 272 | func broadcast(s Gossip, v byte) { 273 | s.GossipBroadcast(newSurrogateGossipData([]byte{v})) 274 | } 275 | 276 | func TestRandomNeighbours(t *testing.T) { 277 | const nTrials = 5000 278 | ourself := PeerName(0) // aliased with UnknownPeerName, which is ok here 279 | // Check fairness of selection across different-sized sets 280 | for _, test := range []struct{ nPeers, nNeighbours int }{{1, 0}, {2, 1}, {3, 2}, {10, 2}, {10, 3}, {10, 9}, {100, 2}, {100, 99}} { 281 | t.Run(fmt.Sprint(test.nPeers, "_peers_", test.nNeighbours, "_neighbours"), func(t *testing.T) { 282 | // Create a test fixture with unicastAll set up 283 | r := routes{ 284 | unicastAll: make(unicastRoutes, test.nPeers), 285 | } 286 | // The route to 'ourself' is always via 'unknown' 287 | r.unicastAll[ourself] = UnknownPeerName 288 | // Fully-connected: unicast route to X is via X 289 | for i := 1; i < test.nPeers; i++ { 290 | r.unicastAll[PeerName(i)] = PeerName(i%test.nNeighbours + 1) 291 | } 292 | total := 0 293 | counts := make([]int, test.nNeighbours+1) 294 | // Run randomNeighbours() several times, and count the distribution 295 | for trial := 0; trial < nTrials; trial++ { 296 | targets := r.randomNeighbours(ourself) 297 | expected := int(math.Min(2*math.Log2(float64(test.nPeers)), float64(test.nNeighbours))) 298 | require.Equal(t, expected, len(targets)) 299 | total += len(targets) 300 | for _, p := range targets { 301 | counts[p]++ 302 | } 303 | } 304 | require.Equal(t, 0, counts[ourself], "randomNeighbours should not select source peer") 305 | // Check that each neighbour was picked within 20% of an average count 306 | for i := 1; i < test.nNeighbours+1; i++ { 307 | require.InEpsilon(t, float64(total)/float64(test.nNeighbours), counts[i], 0.2, "peer %d picked %d times out of %d; counts %v", i, counts[i], total, counts) 308 | } 309 | }) 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /lint: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | if [ ! $(command -v golangci-lint) ] 8 | then 9 | curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh| sh -s -- -b $(go env GOPATH)/bin v1.20.0 10 | golangci-lint --version 11 | fi 12 | 13 | golangci-lint run 14 | -------------------------------------------------------------------------------- /local_peer.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "encoding/gob" 5 | "fmt" 6 | "net" 7 | "sync" 8 | "time" 9 | ) 10 | 11 | const ( 12 | deferTopologyUpdateDuration = 1 * time.Second 13 | ) 14 | 15 | // localPeer is the only "active" peer in the mesh. It extends Peer with 16 | // additional behaviors, mostly to retrieve and manage connection state. 17 | type localPeer struct { 18 | sync.RWMutex 19 | *Peer 20 | router *Router 21 | actionChan chan<- localPeerAction 22 | topologyUpdates peerNameSet 23 | timer *time.Timer 24 | pendingTopologyUpdate bool 25 | } 26 | 27 | // The actor closure used by localPeer. 28 | type localPeerAction func() 29 | 30 | // newLocalPeer returns a usable LocalPeer. 31 | func newLocalPeer(name PeerName, nickName string, router *Router) *localPeer { 32 | actionChan := make(chan localPeerAction, ChannelSize) 33 | topologyUpdates := make(peerNameSet) 34 | peer := &localPeer{ 35 | Peer: newPeer(name, nickName, randomPeerUID(), 0, randomPeerShortID()), 36 | router: router, 37 | actionChan: actionChan, 38 | topologyUpdates: topologyUpdates, 39 | timer: time.NewTimer(deferTopologyUpdateDuration), 40 | } 41 | peer.timer.Stop() 42 | go peer.actorLoop(actionChan) 43 | return peer 44 | } 45 | 46 | // Connections returns all the connections that the local peer is aware of. 47 | func (peer *localPeer) getConnections() connectionSet { 48 | connections := make(connectionSet) 49 | peer.RLock() 50 | defer peer.RUnlock() 51 | for _, conn := range peer.connections { 52 | connections[conn] = struct{}{} 53 | } 54 | return connections 55 | } 56 | 57 | // ConnectionTo returns the connection to the named peer, if any. 58 | // 59 | // TODO(pb): Weave Net invokes router.Ourself.ConnectionTo; 60 | // it may be better to provide that on Router directly. 61 | func (peer *localPeer) ConnectionTo(name PeerName) (Connection, bool) { 62 | peer.RLock() 63 | defer peer.RUnlock() 64 | conn, found := peer.connections[name] 65 | return conn, found // yes, you really can't inline that. FFS. 66 | } 67 | 68 | // ConnectionsTo returns all known connections to the named peers. 69 | // 70 | // TODO(pb): Weave Net invokes router.Ourself.ConnectionsTo; 71 | // it may be better to provide that on Router directly. 72 | func (peer *localPeer) ConnectionsTo(names []PeerName) []Connection { 73 | if len(names) == 0 { 74 | return nil 75 | } 76 | conns := make([]Connection, 0, len(names)) 77 | peer.RLock() 78 | defer peer.RUnlock() 79 | for _, name := range names { 80 | conn, found := peer.connections[name] 81 | // Again, !found could just be due to a race. 82 | if found { 83 | conns = append(conns, conn) 84 | } 85 | } 86 | return conns 87 | } 88 | 89 | // createConnection creates a new connection, originating from 90 | // localAddr, to peerAddr. If acceptNewPeer is false, peerAddr must 91 | // already be a member of the mesh. 92 | func (peer *localPeer) createConnection(localAddr string, peerAddr string, acceptNewPeer bool, logger Logger) error { 93 | if err := peer.checkConnectionLimit(); err != nil { 94 | return err 95 | } 96 | localTCPAddr, err := net.ResolveTCPAddr("tcp", localAddr) 97 | if err != nil { 98 | return err 99 | } 100 | remoteTCPAddr, err := net.ResolveTCPAddr("tcp", peerAddr) 101 | if err != nil { 102 | return err 103 | } 104 | tcpConn, err := net.DialTCP("tcp", localTCPAddr, remoteTCPAddr) 105 | if err != nil { 106 | return err 107 | } 108 | connRemote := newRemoteConnection(peer.Peer, nil, peerAddr, true, false) 109 | startLocalConnection(connRemote, tcpConn, peer.router, acceptNewPeer, logger) 110 | return nil 111 | } 112 | 113 | // ACTOR client API 114 | 115 | // Synchronous. 116 | func (peer *localPeer) doAddConnection(conn ourConnection, isRestartedPeer bool) error { 117 | resultChan := make(chan error) 118 | peer.actionChan <- func() { 119 | resultChan <- peer.handleAddConnection(conn, isRestartedPeer) 120 | } 121 | return <-resultChan 122 | } 123 | 124 | // Asynchronous. 125 | func (peer *localPeer) doConnectionEstablished(conn ourConnection) { 126 | peer.actionChan <- func() { 127 | peer.handleConnectionEstablished(conn) 128 | } 129 | } 130 | 131 | // Synchronous. 132 | func (peer *localPeer) doDeleteConnection(conn ourConnection) { 133 | resultChan := make(chan interface{}) 134 | peer.actionChan <- func() { 135 | peer.handleDeleteConnection(conn) 136 | resultChan <- nil 137 | } 138 | <-resultChan 139 | } 140 | 141 | func (peer *localPeer) encode(enc *gob.Encoder) { 142 | peer.RLock() 143 | defer peer.RUnlock() 144 | peer.Peer.encode(enc) 145 | } 146 | 147 | // ACTOR server 148 | 149 | func (peer *localPeer) actorLoop(actionChan <-chan localPeerAction) { 150 | gossipInterval := defaultGossipInterval 151 | if peer.router != nil { 152 | gossipInterval = peer.router.gossipInterval() 153 | } 154 | gossipTimer := time.Tick(gossipInterval) 155 | for { 156 | select { 157 | case action := <-actionChan: 158 | action() 159 | case <-gossipTimer: 160 | peer.router.sendAllGossip() 161 | case <-peer.timer.C: 162 | peer.broadcastPendingTopologyUpdates() 163 | } 164 | } 165 | } 166 | 167 | func (peer *localPeer) broadcastPendingTopologyUpdates() { 168 | peer.Lock() 169 | gossipData := peer.topologyUpdates 170 | peer.topologyUpdates = make(peerNameSet) 171 | peer.pendingTopologyUpdate = false 172 | peer.Unlock() 173 | gossipData[peer.Peer.Name] = struct{}{} 174 | peer.router.broadcastTopologyUpdate(gossipData) 175 | } 176 | 177 | func (peer *localPeer) handleAddConnection(conn ourConnection, isRestartedPeer bool) error { 178 | if peer.Peer != conn.getLocal() { 179 | panic("Attempt made to add connection to peer where peer is not the source of connection") 180 | } 181 | if conn.Remote() == nil { 182 | panic("Attempt made to add connection to peer with unknown remote peer") 183 | } 184 | toName := conn.Remote().Name 185 | dupErr := fmt.Errorf("Multiple connections to %s added to %s", conn.Remote(), peer.String()) 186 | // deliberately non symmetrical 187 | if dupConn, found := peer.connections[toName]; found { 188 | if dupConn == conn { 189 | return nil 190 | } 191 | dupOurConn := dupConn.(ourConnection) 192 | switch conn.breakTie(dupOurConn) { 193 | case tieBreakWon: 194 | dupOurConn.shutdown(dupErr) 195 | peer.handleDeleteConnection(dupOurConn) 196 | case tieBreakLost: 197 | return dupErr 198 | case tieBreakTied: 199 | // oh good grief. Sod it, just kill both of them. 200 | dupOurConn.shutdown(dupErr) 201 | peer.handleDeleteConnection(dupOurConn) 202 | return dupErr 203 | } 204 | } 205 | if err := peer.checkConnectionLimit(); err != nil { 206 | return err 207 | } 208 | _, isConnectedPeer := peer.router.Routes.Unicast(toName) 209 | peer.addConnection(conn) 210 | switch { 211 | case isRestartedPeer: 212 | conn.logf("connection added (restarted peer)") 213 | peer.router.sendAllGossipDown(conn) 214 | case isConnectedPeer: 215 | conn.logf("connection added") 216 | default: 217 | conn.logf("connection added (new peer)") 218 | peer.router.sendAllGossipDown(conn) 219 | } 220 | peer.router.Routes.recalculate() 221 | peer.broadcastPeerUpdate(conn.Remote()) 222 | 223 | return nil 224 | } 225 | 226 | func (peer *localPeer) handleConnectionEstablished(conn ourConnection) { 227 | if peer.Peer != conn.getLocal() { 228 | panic("Peer informed of active connection where peer is not the source of connection") 229 | } 230 | if dupConn, found := peer.connections[conn.Remote().Name]; !found || conn != dupConn { 231 | conn.shutdown(fmt.Errorf("Cannot set unknown connection active")) 232 | return 233 | } 234 | peer.connectionEstablished(conn) 235 | conn.logf("connection fully established") 236 | 237 | peer.router.Routes.recalculate() 238 | peer.broadcastPeerUpdate() 239 | } 240 | 241 | func (peer *localPeer) handleDeleteConnection(conn ourConnection) { 242 | if peer.Peer != conn.getLocal() { 243 | panic("Attempt made to delete connection from peer where peer is not the source of connection") 244 | } 245 | if conn.Remote() == nil { 246 | panic("Attempt made to delete connection to peer with unknown remote peer") 247 | } 248 | toName := conn.Remote().Name 249 | if connFound, found := peer.connections[toName]; !found || connFound != conn { 250 | return 251 | } 252 | peer.deleteConnection(conn) 253 | conn.logf("connection deleted") 254 | // Must do garbage collection first to ensure we don't send out an 255 | // update with unreachable peers (can cause looping) 256 | peer.router.Peers.GarbageCollect() 257 | peer.router.Routes.recalculate() 258 | peer.broadcastPeerUpdate() 259 | } 260 | 261 | // helpers 262 | 263 | func (peer *localPeer) broadcastPeerUpdate(peers ...*Peer) { 264 | // Some tests run without a router. This should be fixed so 265 | // that the relevant part of Router can be easily run in the 266 | // context of a test, but that will involve significant 267 | // reworking of tests. 268 | if peer.router != nil { 269 | peer.Lock() 270 | defer peer.Unlock() 271 | if !peer.pendingTopologyUpdate { 272 | peer.timer.Reset(deferTopologyUpdateDuration) 273 | peer.pendingTopologyUpdate = true 274 | } 275 | for _, p := range peers { 276 | peer.topologyUpdates[p.Name] = struct{}{} 277 | } 278 | } 279 | } 280 | 281 | func (peer *localPeer) checkConnectionLimit() error { 282 | limit := peer.router.ConnLimit 283 | if 0 != limit && peer.connectionCount() >= limit { 284 | return fmt.Errorf("Connection limit reached (%v)", limit) 285 | } 286 | return nil 287 | } 288 | 289 | func (peer *localPeer) addConnection(conn Connection) { 290 | peer.Lock() 291 | defer peer.Unlock() 292 | peer.connections[conn.Remote().Name] = conn 293 | peer.Version++ 294 | } 295 | 296 | func (peer *localPeer) deleteConnection(conn Connection) { 297 | peer.Lock() 298 | defer peer.Unlock() 299 | delete(peer.connections, conn.Remote().Name) 300 | peer.Version++ 301 | } 302 | 303 | func (peer *localPeer) connectionEstablished(conn Connection) { 304 | peer.Lock() 305 | defer peer.Unlock() 306 | peer.Version++ 307 | } 308 | 309 | func (peer *localPeer) connectionCount() int { 310 | peer.RLock() 311 | defer peer.RUnlock() 312 | return len(peer.connections) 313 | } 314 | 315 | func (peer *localPeer) setShortID(shortID PeerShortID) { 316 | peer.Lock() 317 | defer peer.Unlock() 318 | peer.ShortID = shortID 319 | peer.Version++ 320 | } 321 | 322 | func (peer *localPeer) setVersionBeyond(version uint64) bool { 323 | peer.Lock() 324 | defer peer.Unlock() 325 | if version >= peer.Version { 326 | peer.Version = version + 1 327 | return true 328 | } 329 | return false 330 | } 331 | -------------------------------------------------------------------------------- /logger.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | // Logger is a simple interface used by mesh to do logging. 4 | type Logger interface { 5 | Printf(format string, args ...interface{}) 6 | } 7 | -------------------------------------------------------------------------------- /meshconn/README.md: -------------------------------------------------------------------------------- 1 | # meshconn 2 | 3 | meshconn implements [net.PacketConn](https://golang.org/pkg/net/#PacketConn) on top of mesh. 4 | Think of it as UDP with benefits: 5 | NAT and bastion host (DMZ) traversal, 6 | broadcast/multicast in networks where this is normally not possible e.g. EC2, 7 | and an up-to-date, queryable memberlist. 8 | 9 | meshconn supports [net.Addr](https://golang.org/pkg/net/#Addr) of the form `weavemesh://`. 10 | By default, `` is a hardware address of the form `01:02:03:FD:FE:FF`. 11 | Other forms of PeerName e.g. hashes are supported. 12 | 13 | meshconn itself is largely stateless and has best-effort delivery semantics. 14 | As a future experiment, it could easily be amended to have basic resiliency guarantees. 15 | Also, at the moment, PacketConn read and write deadlines are not supported. 16 | -------------------------------------------------------------------------------- /meshconn/mesh_addr.go: -------------------------------------------------------------------------------- 1 | package meshconn 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | 7 | "github.com/weaveworks/mesh" 8 | ) 9 | 10 | // MeshAddr implements net.Addr for mesh peers. 11 | type MeshAddr struct { 12 | mesh.PeerName // stable across invocations 13 | mesh.PeerUID // new with each invocation 14 | } 15 | 16 | var _ net.Addr = MeshAddr{} 17 | 18 | // Network returns weavemesh. 19 | func (a MeshAddr) Network() string { return "weavemesh" } 20 | 21 | // String returns weavemesh://. 22 | func (a MeshAddr) String() string { return fmt.Sprintf("%s://%s", a.Network(), a.PeerName.String()) } 23 | -------------------------------------------------------------------------------- /meshconn/peer.go: -------------------------------------------------------------------------------- 1 | package meshconn 2 | 3 | import ( 4 | "errors" 5 | "net" 6 | "time" 7 | 8 | "github.com/weaveworks/mesh" 9 | ) 10 | 11 | var ( 12 | // ErrShortRead is returned by ReadFrom when the 13 | // passed buffer is too small for the packet. 14 | ErrShortRead = errors.New("short read") 15 | 16 | // ErrPeerClosed is returned by ReadFrom and WriteTo 17 | // when the peer is closed during the operation. 18 | ErrPeerClosed = errors.New("peer closed") 19 | 20 | // ErrGossipNotRegistered is returned by Write to when attempting 21 | // to write before a mesh.Gossip has been registered in the peer. 22 | ErrGossipNotRegistered = errors.New("gossip not registered") 23 | 24 | // ErrNotMeshAddr is returned by WriteTo when attempting 25 | // to write to a non-mesh address. 26 | ErrNotMeshAddr = errors.New("not a mesh addr") 27 | 28 | // ErrNotSupported is returned by methods that are not supported. 29 | ErrNotSupported = errors.New("not supported") 30 | ) 31 | 32 | // Peer implements mesh.Gossiper and net.PacketConn. 33 | type Peer struct { 34 | name mesh.PeerName 35 | uid mesh.PeerUID 36 | gossip mesh.Gossip 37 | recv chan pkt 38 | actions chan func() 39 | quit chan struct{} 40 | logger mesh.Logger 41 | } 42 | 43 | // NewPeer returns a Peer, which can be used as a net.PacketConn. 44 | // Clients must Register a mesh.Gossip before calling ReadFrom or WriteTo. 45 | // Clients should aggressively consume from ReadFrom. 46 | func NewPeer(name mesh.PeerName, uid mesh.PeerUID, logger mesh.Logger) *Peer { 47 | p := &Peer{ 48 | name: name, 49 | uid: uid, 50 | gossip: nil, // initially no gossip 51 | recv: make(chan pkt), 52 | actions: make(chan func()), 53 | quit: make(chan struct{}), 54 | logger: logger, 55 | } 56 | go p.loop() 57 | return p 58 | } 59 | 60 | func (p *Peer) loop() { 61 | for { 62 | select { 63 | case f := <-p.actions: 64 | f() 65 | case <-p.quit: 66 | return 67 | } 68 | } 69 | } 70 | 71 | // Register injects the mesh.Gossip and enables full-duplex communication. 72 | // Clients should consume from ReadFrom without blocking. 73 | func (p *Peer) Register(gossip mesh.Gossip) { 74 | p.actions <- func() { p.gossip = gossip } 75 | } 76 | 77 | // ReadFrom implements net.PacketConn. 78 | // Clients should consume from ReadFrom without blocking. 79 | func (p *Peer) ReadFrom(b []byte) (n int, remote net.Addr, err error) { 80 | c := make(chan struct{}) 81 | p.actions <- func() { 82 | go func() { // so as not to block loop 83 | defer close(c) 84 | select { 85 | case pkt := <-p.recv: 86 | n = copy(b, pkt.Buf) 87 | remote = MeshAddr{PeerName: pkt.SrcName, PeerUID: pkt.SrcUID} 88 | if n < len(pkt.Buf) { 89 | err = ErrShortRead 90 | } 91 | case <-p.quit: 92 | err = ErrPeerClosed 93 | } 94 | }() 95 | } 96 | <-c 97 | return n, remote, err 98 | } 99 | 100 | // WriteTo implements net.PacketConn. 101 | func (p *Peer) WriteTo(b []byte, dst net.Addr) (n int, err error) { 102 | c := make(chan struct{}) 103 | p.actions <- func() { 104 | defer close(c) 105 | if p.gossip == nil { 106 | err = ErrGossipNotRegistered 107 | return 108 | } 109 | meshAddr, ok := dst.(MeshAddr) 110 | if !ok { 111 | err = ErrNotMeshAddr 112 | return 113 | } 114 | pkt := pkt{SrcName: p.name, SrcUID: p.uid, Buf: b} 115 | if meshAddr.PeerName == p.name { 116 | p.recv <- pkt 117 | return 118 | } 119 | // TODO(pb): detect and support broadcast 120 | buf := pkt.encode() 121 | n = len(buf) 122 | err = p.gossip.GossipUnicast(meshAddr.PeerName, buf) 123 | } 124 | <-c 125 | return n, err 126 | } 127 | 128 | // Close implements net.PacketConn. 129 | func (p *Peer) Close() error { 130 | close(p.quit) 131 | return nil 132 | } 133 | 134 | // LocalAddr implements net.PacketConn. 135 | func (p *Peer) LocalAddr() net.Addr { 136 | return MeshAddr{PeerName: p.name, PeerUID: p.uid} 137 | } 138 | 139 | // SetDeadline implements net.PacketConn. 140 | // SetDeadline is not supported. 141 | func (p *Peer) SetDeadline(time.Time) error { 142 | return ErrNotSupported 143 | } 144 | 145 | // SetReadDeadline implements net.PacketConn. 146 | // SetReadDeadline is not supported. 147 | func (p *Peer) SetReadDeadline(time.Time) error { 148 | return ErrNotSupported 149 | } 150 | 151 | // SetWriteDeadline implements net.PacketConn. 152 | // SetWriteDeadline is not supported. 153 | func (p *Peer) SetWriteDeadline(time.Time) error { 154 | return ErrNotSupported 155 | } 156 | 157 | // Gossip implements mesh.Gossiper. 158 | func (p *Peer) Gossip() (complete mesh.GossipData) { 159 | return pktSlice{} // we're stateless 160 | } 161 | 162 | // OnGossip implements mesh.Gossiper. 163 | // The buf is a single pkt. 164 | func (p *Peer) OnGossip(buf []byte) (delta mesh.GossipData, err error) { 165 | return pktSlice{makePkt(buf)}, nil 166 | } 167 | 168 | // OnGossipBroadcast implements mesh.Gossiper. 169 | // The buf is a single pkt 170 | func (p *Peer) OnGossipBroadcast(_ mesh.PeerName, buf []byte) (received mesh.GossipData, err error) { 171 | pkt := makePkt(buf) 172 | p.recv <- pkt // to ReadFrom 173 | return pktSlice{pkt}, nil 174 | } 175 | 176 | // OnGossipUnicast implements mesh.Gossiper. 177 | // The buf is a single pkt. 178 | func (p *Peer) OnGossipUnicast(_ mesh.PeerName, buf []byte) error { 179 | pkt := makePkt(buf) 180 | p.recv <- pkt // to ReadFrom 181 | return nil 182 | } 183 | -------------------------------------------------------------------------------- /meshconn/pkt.go: -------------------------------------------------------------------------------- 1 | package meshconn 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | 7 | "github.com/weaveworks/mesh" 8 | ) 9 | 10 | type pkt struct { 11 | SrcName mesh.PeerName 12 | SrcUID mesh.PeerUID 13 | Buf []byte 14 | } 15 | 16 | func makePkt(buf []byte) pkt { 17 | var p pkt 18 | if err := gob.NewDecoder(bytes.NewBuffer(buf)).Decode(&p); err != nil { 19 | panic(err) 20 | } 21 | return p 22 | } 23 | 24 | func (p pkt) encode() []byte { 25 | var buf bytes.Buffer 26 | if err := gob.NewEncoder(&buf).Encode(p); err != nil { 27 | panic(err) 28 | } 29 | return buf.Bytes() 30 | } 31 | 32 | // pktSlice implements mesh.GossipData. 33 | type pktSlice []pkt 34 | 35 | var _ mesh.GossipData = &pktSlice{} 36 | 37 | func (s pktSlice) Encode() [][]byte { 38 | bufs := make([][]byte, len(s)) 39 | for i, pkt := range s { 40 | bufs[i] = pkt.encode() 41 | } 42 | return bufs 43 | } 44 | 45 | func (s pktSlice) Merge(other mesh.GossipData) mesh.GossipData { 46 | o := other.(pktSlice) 47 | merged := make(pktSlice, 0, len(s)+len(o)) 48 | merged = append(merged, s...) 49 | merged = append(merged, o...) 50 | return merged 51 | } 52 | -------------------------------------------------------------------------------- /mocks_test.go: -------------------------------------------------------------------------------- 1 | // No mocks are tested by this file. 2 | // 3 | // It supplies some mock implementations to other unit tests, and is 4 | // named "...test.go" so it is only compiled under `go test`. 5 | 6 | package mesh 7 | 8 | import ( 9 | "fmt" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | // Add to peers a connection from peers.ourself to p 16 | func (peers *Peers) AddTestConnection(p *Peer) { 17 | summary := p.peerSummary 18 | summary.Version = 0 19 | toPeer := newPeerFromSummary(summary) 20 | toPeer = peers.fetchWithDefault(toPeer) // Has side-effect of incrementing refcount 21 | conn := newMockConnection(peers.ourself.Peer, toPeer) 22 | peers.ourself.addConnection(conn) 23 | peers.ourself.connectionEstablished(conn) 24 | } 25 | 26 | // Add to peers a connection from p1 to p2 27 | func (peers *Peers) AddTestRemoteConnection(p1, p2 *Peer) { 28 | fromPeer := newPeerFrom(p1) 29 | fromPeer = peers.fetchWithDefault(fromPeer) 30 | toPeer := newPeerFrom(p2) 31 | toPeer = peers.fetchWithDefault(toPeer) 32 | peers.ourself.addConnection(newRemoteConnection(fromPeer, toPeer, "", false, false)) 33 | } 34 | 35 | func (peers *Peers) DeleteTestConnection(p *Peer) { 36 | toName := p.Name 37 | toPeer := peers.Fetch(toName) 38 | peers.dereference(toPeer) 39 | conn, _ := peers.ourself.ConnectionTo(toName) 40 | peers.ourself.deleteConnection(conn) 41 | } 42 | 43 | // mockConnection used in testing is very similar to a 44 | // RemoteConnection, without the RemoteTCPAddr(). We are making it a 45 | // separate type in order to distinguish what is created by the test 46 | // from what is created by the real code. 47 | func newMockConnection(from, to *Peer) Connection { 48 | type mockConnection struct{ *remoteConnection } 49 | return &mockConnection{newRemoteConnection(from, to, "", false, false)} 50 | } 51 | 52 | func checkEqualConns(t *testing.T, ourName PeerName, got, wanted map[PeerName]Connection) { 53 | checkConns := make(peerNameSet) 54 | for _, conn := range wanted { 55 | checkConns[conn.Remote().Name] = struct{}{} 56 | } 57 | for _, conn := range got { 58 | remoteName := conn.Remote().Name 59 | if _, found := checkConns[remoteName]; found { 60 | delete(checkConns, remoteName) 61 | } else { 62 | require.FailNow(t, fmt.Sprintf("Unexpected connection from %s to %s", ourName, remoteName)) 63 | } 64 | } 65 | if len(checkConns) > 0 { 66 | require.FailNow(t, fmt.Sprintf("Expected connections not found: from %s to %v", ourName, checkConns)) 67 | } 68 | } 69 | 70 | // Get all the peers from a Peers in a slice 71 | func (peers *Peers) allPeers() []*Peer { 72 | var res []*Peer 73 | for _, peer := range peers.byName { 74 | res = append(res, peer) 75 | } 76 | return res 77 | } 78 | 79 | func (peers *Peers) allPeersExcept(excludeName PeerName) []*Peer { 80 | res := peers.allPeers() 81 | for i, peer := range res { 82 | if peer.Name == excludeName { 83 | return append(res[:i], res[i+1:]...) 84 | } 85 | } 86 | return res 87 | } 88 | 89 | // Check that the peers slice matches the wanted peers 90 | func checkPeerArray(t *testing.T, peers []*Peer, wantedPeers ...*Peer) { 91 | checkTopologyPeers(t, false, peers, wantedPeers...) 92 | } 93 | 94 | // Check that the peers slice matches the wanted peers and optionally 95 | // all of their connections 96 | func checkTopologyPeers(t *testing.T, checkConns bool, peers []*Peer, wantedPeers ...*Peer) { 97 | check := make(map[PeerName]*Peer) 98 | for _, peer := range wantedPeers { 99 | check[peer.Name] = peer 100 | } 101 | for _, peer := range peers { 102 | name := peer.Name 103 | if wantedPeer, found := check[name]; found { 104 | if checkConns { 105 | checkEqualConns(t, name, peer.connections, wantedPeer.connections) 106 | } 107 | delete(check, name) 108 | } else { 109 | require.FailNow(t, fmt.Sprintf("Unexpected peer: %s", name)) 110 | } 111 | } 112 | if len(check) > 0 { 113 | require.FailNow(t, fmt.Sprintf("Expected peers not found: %v", check)) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /overlay.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "net" 5 | ) 6 | 7 | // Overlay yields OverlayConnections. 8 | type Overlay interface { 9 | // Enhance a features map with overlay-related features. 10 | AddFeaturesTo(map[string]string) 11 | 12 | // Prepare on overlay connection. The connection should remain 13 | // passive until it has been Confirm()ed. 14 | PrepareConnection(OverlayConnectionParams) (OverlayConnection, error) 15 | 16 | // Obtain diagnostic information specific to the overlay. 17 | Diagnostics() interface{} 18 | 19 | // Stop the overlay. 20 | Stop() 21 | } 22 | 23 | // OverlayConnectionParams are used to set up overlay connections. 24 | type OverlayConnectionParams struct { 25 | RemotePeer *Peer 26 | 27 | // The local address of the corresponding TCP connection. Used to 28 | // derive the local IP address for sending. May differ for 29 | // different overlay connections. 30 | LocalAddr *net.TCPAddr 31 | 32 | // The remote address of the corresponding TCP connection. Used to 33 | // determine the address to send to, but only if the TCP 34 | // connection is outbound. Otherwise the Overlay needs to discover 35 | // it (e.g. from incoming datagrams). 36 | RemoteAddr *net.TCPAddr 37 | 38 | // Is the corresponding TCP connection outbound? 39 | Outbound bool 40 | 41 | // Unique identifier for this connection 42 | ConnUID uint64 43 | 44 | // Session key, if connection is encrypted; nil otherwise. 45 | // 46 | // NB: overlay connections must take care not to use nonces which 47 | // may collide with those of the main connection. These nonces are 48 | // 192 bits, with the top most bit unspecified, the next bit set 49 | // to 1, followed by 126 zero bits, and a message sequence number 50 | // in the lowest 64 bits. 51 | SessionKey *[32]byte 52 | 53 | // Function to send a control message to the counterpart 54 | // overlay connection. 55 | SendControlMessage func(tag byte, msg []byte) error 56 | 57 | // Features passed at connection initiation 58 | Features map[string]string 59 | } 60 | 61 | // OverlayConnection describes all of the machinery to manage overlay 62 | // connectivity to a particular peer. 63 | type OverlayConnection interface { 64 | // Confirm that the connection is really wanted, and so the 65 | // Overlay should begin heartbeats etc. to verify the operation of 66 | // the overlay connection. 67 | Confirm() 68 | 69 | // EstablishedChannel returns a channel that will be closed when the 70 | // overlay connection is established, i.e. its operation has been 71 | // confirmed. 72 | EstablishedChannel() <-chan struct{} 73 | 74 | // ErrorChannel returns a channel that forwards errors from the overlay 75 | // connection. The overlay connection is not expected to be operational 76 | // after the first error, so the channel only needs to buffer a single 77 | // error. 78 | ErrorChannel() <-chan error 79 | 80 | // Stop terminates the connection. 81 | Stop() 82 | 83 | // ControlMessage handles a message from the remote peer. 'tag' exists for 84 | // compatibility, and should always be ProtocolOverlayControlMessage for 85 | // non-sleeve overlays. 86 | ControlMessage(tag byte, msg []byte) 87 | 88 | // Attrs returns the user-facing overlay name plus any other 89 | // data that users may wish to check or monitor 90 | Attrs() map[string]interface{} 91 | } 92 | 93 | // NullOverlay implements Overlay and OverlayConnection with no-ops. 94 | type NullOverlay struct{} 95 | 96 | // AddFeaturesTo implements Overlay. 97 | func (NullOverlay) AddFeaturesTo(map[string]string) {} 98 | 99 | // PrepareConnection implements Overlay. 100 | func (NullOverlay) PrepareConnection(OverlayConnectionParams) (OverlayConnection, error) { 101 | return NullOverlay{}, nil 102 | } 103 | 104 | // Diagnostics implements Overlay. 105 | func (NullOverlay) Diagnostics() interface{} { return nil } 106 | 107 | // Confirm implements OverlayConnection. 108 | func (NullOverlay) Confirm() {} 109 | 110 | // EstablishedChannel implements OverlayConnection. 111 | func (NullOverlay) EstablishedChannel() <-chan struct{} { 112 | c := make(chan struct{}) 113 | close(c) 114 | return c 115 | } 116 | 117 | // ErrorChannel implements OverlayConnection. 118 | func (NullOverlay) ErrorChannel() <-chan error { return nil } 119 | 120 | // Stop implements OverlayConnection. 121 | func (NullOverlay) Stop() {} 122 | 123 | // ControlMessage implements OverlayConnection. 124 | func (NullOverlay) ControlMessage(byte, []byte) {} 125 | 126 | // Attrs implements OverlayConnection. 127 | func (NullOverlay) Attrs() map[string]interface{} { return nil } 128 | -------------------------------------------------------------------------------- /peer.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "crypto/rand" 5 | "encoding/binary" 6 | "fmt" 7 | "sort" 8 | "strconv" 9 | ) 10 | 11 | // Peer is a local representation of a peer, including connections to other 12 | // peers. By itself, it is a remote peer. 13 | type Peer struct { 14 | Name PeerName 15 | peerSummary 16 | localRefCount uint64 // maintained by Peers 17 | connections map[PeerName]Connection 18 | } 19 | 20 | type peerSummary struct { 21 | NameByte []byte 22 | NickName string 23 | UID PeerUID 24 | Version uint64 25 | ShortID PeerShortID 26 | HasShortID bool 27 | } 28 | 29 | // PeerDescription collects information about peers that is useful to clients. 30 | type PeerDescription struct { 31 | Name PeerName 32 | NickName string 33 | UID PeerUID 34 | Self bool 35 | NumConnections int 36 | } 37 | 38 | type connectionSet map[Connection]struct{} 39 | 40 | func newPeerFromSummary(summary peerSummary) *Peer { 41 | return &Peer{ 42 | Name: PeerNameFromBin(summary.NameByte), 43 | peerSummary: summary, 44 | connections: make(map[PeerName]Connection), 45 | } 46 | } 47 | 48 | func newPeer(name PeerName, nickName string, uid PeerUID, version uint64, shortID PeerShortID) *Peer { 49 | return newPeerFromSummary(peerSummary{ 50 | NameByte: name.bytes(), 51 | NickName: nickName, 52 | UID: uid, 53 | Version: version, 54 | ShortID: shortID, 55 | HasShortID: true, 56 | }) 57 | } 58 | 59 | func newPeerPlaceholder(name PeerName) *Peer { 60 | return newPeerFromSummary(peerSummary{NameByte: name.bytes()}) 61 | } 62 | 63 | // String returns the peer name and nickname. 64 | func (peer *Peer) String() string { 65 | return fmt.Sprint(peer.Name, "(", peer.NickName, ")") 66 | } 67 | 68 | // Routes calculates the routing table from this peer to all peers reachable 69 | // from it, returning a "next hop" map of PeerNameX -> PeerNameY, which says 70 | // "in order to send a message to X, the peer should send the message to its 71 | // neighbour Y". 72 | // 73 | // Because currently we do not have weightings on the connections between 74 | // peers, there is no need to use a minimum spanning tree algorithm. Instead 75 | // we employ the simpler and cheaper breadth-first widening. The computation 76 | // is deterministic, which ensures that when it is performed on the same data 77 | // by different peers, they get the same result. This is important since 78 | // otherwise we risk message loss or routing cycles. 79 | // 80 | // When the 'establishedAndSymmetric' flag is set, only connections that are 81 | // marked as 'established' and are symmetric (i.e. where both sides indicate 82 | // they have a connection to the other) are considered. 83 | // 84 | // When a non-nil stopAt peer is supplied, the widening stops when it reaches 85 | // that peer. The boolean return indicates whether that has happened. 86 | // 87 | // NB: This function should generally be invoked while holding a read lock on 88 | // Peers and LocalPeer. 89 | func (peer *Peer) routes(stopAt *Peer, establishedAndSymmetric bool) (bool, map[PeerName]PeerName) { 90 | routes := make(unicastRoutes) 91 | routes[peer.Name] = UnknownPeerName 92 | nextWorklist := []*Peer{peer} 93 | for len(nextWorklist) > 0 { 94 | worklist := nextWorklist 95 | sort.Sort(listOfPeers(worklist)) 96 | nextWorklist = []*Peer{} 97 | for _, curPeer := range worklist { 98 | if curPeer == stopAt { 99 | return true, routes 100 | } 101 | curPeer.forEachConnectedPeer(establishedAndSymmetric, routes, 102 | func(remotePeer *Peer) { 103 | nextWorklist = append(nextWorklist, remotePeer) 104 | remoteName := remotePeer.Name 105 | // We now know how to get to remoteName: the same 106 | // way we get to curPeer. Except, if curPeer is 107 | // the starting peer in which case we know we can 108 | // reach remoteName directly. 109 | if curPeer == peer { 110 | routes[remoteName] = remoteName 111 | } else { 112 | routes[remoteName] = routes[curPeer.Name] 113 | } 114 | }) 115 | } 116 | } 117 | return false, routes 118 | } 119 | 120 | // Apply f to all peers reachable by peer. If establishedAndSymmetric is true, 121 | // only peers with established bidirectional connections will be selected. The 122 | // exclude maps is treated as a set of remote peers to blacklist. 123 | func (peer *Peer) forEachConnectedPeer(establishedAndSymmetric bool, exclude map[PeerName]PeerName, f func(*Peer)) { 124 | for remoteName, conn := range peer.connections { 125 | if establishedAndSymmetric && !conn.isEstablished() { 126 | continue 127 | } 128 | if _, found := exclude[remoteName]; found { 129 | continue 130 | } 131 | remotePeer := conn.Remote() 132 | if remoteConn, found := remotePeer.connections[peer.Name]; !establishedAndSymmetric || (found && remoteConn.isEstablished()) { 133 | f(remotePeer) 134 | } 135 | } 136 | } 137 | 138 | // PeerUID uniquely identifies a peer in a mesh. 139 | type PeerUID uint64 140 | 141 | // ParsePeerUID parses a decimal peer UID from a string. 142 | func parsePeerUID(s string) (PeerUID, error) { 143 | uid, err := strconv.ParseUint(s, 10, 64) 144 | return PeerUID(uid), err 145 | } 146 | 147 | func randomPeerUID() PeerUID { 148 | for { 149 | uid := randUint64() 150 | if uid != 0 { // uid 0 is reserved for peer placeholder 151 | return PeerUID(uid) 152 | } 153 | } 154 | } 155 | 156 | // PeerShortID exists for the sake of fast datapath. They are 12 bits, 157 | // randomly assigned, but we detect and recover from collisions. This 158 | // does limit us to 4096 peers, but that should be sufficient for a 159 | // while. 160 | type PeerShortID uint16 161 | 162 | const peerShortIDBits = 12 163 | 164 | func randomPeerShortID() PeerShortID { 165 | return PeerShortID(randUint16() & (1<> 1 21 | 22 | // UnknownPeerName is used as a sentinel value. 23 | UnknownPeerName = PeerName("") 24 | ) 25 | 26 | // PeerNameFromUserInput parses PeerName from a user-provided string. 27 | func PeerNameFromUserInput(userInput string) (PeerName, error) { 28 | // fixed-length identity 29 | nameByteAry := sha256.Sum256([]byte(userInput)) 30 | return PeerNameFromBin(nameByteAry[:NameSize]), nil 31 | } 32 | 33 | // PeerNameFromString parses PeerName from a generic string. 34 | func PeerNameFromString(nameStr string) (PeerName, error) { 35 | if _, err := hex.DecodeString(nameStr); err != nil { 36 | return UnknownPeerName, err 37 | } 38 | return PeerName(nameStr), nil 39 | } 40 | 41 | // PeerNameFromBin parses PeerName from a byte slice. 42 | func PeerNameFromBin(nameByte []byte) PeerName { 43 | return PeerName(hex.EncodeToString(nameByte)) 44 | } 45 | 46 | // bytes encodes PeerName as a byte slice. 47 | func (name PeerName) bytes() []byte { 48 | res, err := hex.DecodeString(string(name)) 49 | if err != nil { 50 | panic("unable to decode name to bytes: " + name) 51 | } 52 | return res 53 | } 54 | 55 | // String encodes PeerName as a string. 56 | func (name PeerName) String() string { 57 | return string(name) 58 | } 59 | -------------------------------------------------------------------------------- /peer_name_hash_test.go: -------------------------------------------------------------------------------- 1 | // +build peer_name_hash 2 | 3 | package mesh_test 4 | 5 | import "testing" 6 | 7 | func TestHashPeerNameFromUserInput(t *testing.T) { 8 | t.Skip("TODO") 9 | } 10 | 11 | func TestHashPeerNameFromString(t *testing.T) { 12 | t.Skip("TODO") 13 | } 14 | 15 | func TestHashPeerNameFromBin(t *testing.T) { 16 | t.Skip("TODO") 17 | } 18 | -------------------------------------------------------------------------------- /peer_name_mac.go: -------------------------------------------------------------------------------- 1 | // +build peer_name_mac !peer_name_alternative 2 | 3 | package mesh 4 | 5 | // The !peer_name_alternative effectively makes this the default, 6 | // i.e. to choose an alternative, run 7 | // 8 | // go build -tags 'peer_name_alternative peer_name_hash' 9 | // 10 | // Let peer names be MACs... 11 | // 12 | // MACs need to be unique across our network, or bad things will 13 | // happen anyway. So they make pretty good candidates for peer 14 | // names. And doing so is pretty efficient both computationally and 15 | // network overhead wise. 16 | // 17 | // Note that we do not mandate *what* MAC should be used as the peer 18 | // name. In particular it doesn't actually have to be the MAC of, say, 19 | // the network interface the peer is sniffing on. 20 | 21 | import ( 22 | "fmt" 23 | "net" 24 | ) 25 | 26 | // PeerName is used as a map key. Since net.HardwareAddr isn't suitable for 27 | // that - it's a slice, and slices can't be map keys - we convert that to/from 28 | // uint64. 29 | type PeerName uint64 30 | 31 | const ( 32 | // PeerNameFlavour is the type of peer names we use. 33 | PeerNameFlavour = "mac" 34 | 35 | // NameSize is the number of bytes in a peer name. 36 | NameSize = 6 37 | 38 | // UnknownPeerName is used as a sentinel value. 39 | UnknownPeerName = PeerName(0) 40 | ) 41 | 42 | // PeerNameFromUserInput parses PeerName from a user-provided string. 43 | func PeerNameFromUserInput(userInput string) (PeerName, error) { 44 | return PeerNameFromString(userInput) 45 | } 46 | 47 | // PeerNameFromString parses PeerName from a generic string. 48 | func PeerNameFromString(nameStr string) (PeerName, error) { 49 | var a, b, c, d, e, f uint64 50 | 51 | match := func(format string, args ...interface{}) bool { 52 | a, b, c, d, e, f = 0, 0, 0, 0, 0, 0 53 | n, err := fmt.Sscanf(nameStr+"\000", format+"\000", args...) 54 | return err == nil && n == len(args) 55 | } 56 | 57 | switch { 58 | case match("%2x:%2x:%2x:%2x:%2x:%2x", &a, &b, &c, &d, &e, &f): 59 | case match("::%2x:%2x:%2x:%2x", &c, &d, &e, &f): 60 | case match("%2x::%2x:%2x:%2x", &a, &d, &e, &f): 61 | case match("%2x:%2x::%2x:%2x", &a, &b, &e, &f): 62 | case match("%2x:%2x:%2x::%2x", &a, &b, &c, &f): 63 | case match("%2x:%2x:%2x:%2x::", &a, &b, &c, &d): 64 | case match("::%2x:%2x:%2x", &d, &e, &f): 65 | case match("%2x::%2x:%2x", &a, &e, &f): 66 | case match("%2x:%2x::%2x", &a, &b, &f): 67 | case match("%2x:%2x:%2x::", &a, &b, &c): 68 | case match("::%2x:%2x", &e, &f): 69 | case match("%2x::%2x", &a, &f): 70 | case match("%2x:%2x::", &a, &b): 71 | case match("::%2x", &f): 72 | case match("%2x::", &a): 73 | default: 74 | return UnknownPeerName, fmt.Errorf("invalid peer name format: %q", nameStr) 75 | } 76 | 77 | return PeerName(a<<40 | b<<32 | c<<24 | d<<16 | e<<8 | f), nil 78 | } 79 | 80 | // PeerNameFromBin parses PeerName from a byte slice. 81 | func PeerNameFromBin(nameByte []byte) PeerName { 82 | return PeerName(macint(net.HardwareAddr(nameByte))) 83 | } 84 | 85 | // bytes encodes PeerName as a byte slice. 86 | func (name PeerName) bytes() []byte { 87 | return intmac(uint64(name)) 88 | } 89 | 90 | // String encodes PeerName as a string. 91 | func (name PeerName) String() string { 92 | return intmac(uint64(name)).String() 93 | } 94 | 95 | func macint(mac net.HardwareAddr) (r uint64) { 96 | for _, b := range mac { 97 | r <<= 8 98 | r |= uint64(b) 99 | } 100 | return 101 | } 102 | 103 | func intmac(key uint64) (r net.HardwareAddr) { 104 | r = make([]byte, 6) 105 | for i := 5; i >= 0; i-- { 106 | r[i] = byte(key) 107 | key >>= 8 108 | } 109 | return 110 | } 111 | -------------------------------------------------------------------------------- /peer_name_mac_test.go: -------------------------------------------------------------------------------- 1 | // +build peer_name_mac !peer_name_alternative 2 | 3 | package mesh_test 4 | 5 | import ( 6 | "github.com/stretchr/testify/require" 7 | "github.com/weaveworks/mesh" 8 | "testing" 9 | ) 10 | 11 | func TestMacPeerNameFromUserInput(t *testing.T) { 12 | t.Skip("TODO") 13 | } 14 | 15 | func checkSuccess(t *testing.T, nameStr string, expected uint64) { 16 | actual, err := mesh.PeerNameFromString(nameStr) 17 | require.NoError(t, err) 18 | require.Equal(t, mesh.PeerName(expected), actual) 19 | } 20 | 21 | func checkFailure(t *testing.T, nameStr string) { 22 | _, err := mesh.PeerNameFromString(nameStr) 23 | require.Error(t, err) 24 | } 25 | 26 | func TestMacPeerNameFromString(t *testing.T) { 27 | // Permitted elisions 28 | checkSuccess(t, "12:34:56:78:9A:BC", 0x123456789ABC) 29 | checkSuccess(t, "::56:78:9A:BC", 0x000056789ABC) 30 | checkSuccess(t, "12::78:9A:BC", 0x120000789ABC) 31 | checkSuccess(t, "12:34::9A:BC", 0x123400009ABC) 32 | checkSuccess(t, "12:34:56::BC", 0x1234560000BC) 33 | checkSuccess(t, "12:34:56:78::", 0x123456780000) 34 | checkSuccess(t, "::78:9A:BC", 0x000000789ABC) 35 | checkSuccess(t, "12::9A:BC", 0x120000009ABC) 36 | checkSuccess(t, "12:34::BC", 0x1234000000BC) 37 | checkSuccess(t, "12:34:56::", 0x123456000000) 38 | checkSuccess(t, "::9A:BC", 0x000000009ABC) 39 | checkSuccess(t, "12::BC", 0x1200000000BC) 40 | checkSuccess(t, "12:34::", 0x123400000000) 41 | checkSuccess(t, "::BC", 0x0000000000BC) 42 | checkSuccess(t, "12::", 0x120000000000) 43 | 44 | // Case insensitivity 45 | checkSuccess(t, "ab:cD:Ef:AB::", 0xABCDEFAB0000) 46 | 47 | // Optional zero padding 48 | checkSuccess(t, "1:2:3:4:5:6", 0x010203040506) 49 | checkSuccess(t, "01:02:03:04:05:06", 0x010203040506) 50 | 51 | // Trailing garbage detection 52 | checkFailure(t, "12::garbage") 53 | 54 | // Octet length 55 | checkFailure(t, "123::") 56 | 57 | // Forbidden elisions 58 | checkFailure(t, "::") 59 | checkFailure(t, "::34:56:78:9A:BC") 60 | checkFailure(t, "12::56:78:9A:BC") 61 | checkFailure(t, "12:34::78:9A:BC") 62 | checkFailure(t, "12:34:56::9A:BC") 63 | checkFailure(t, "12:34:56:78::BC") 64 | checkFailure(t, "12:34:56:78:9A::") 65 | checkFailure(t, "12::78::") 66 | } 67 | 68 | func TestMacPeerNameFromBin(t *testing.T) { 69 | t.Skip("TODO") 70 | } 71 | -------------------------------------------------------------------------------- /peer_test.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import "testing" 4 | 5 | func newPeerFrom(peer *Peer) *Peer { 6 | return newPeerFromSummary(peer.peerSummary) 7 | } 8 | 9 | func TestPeerRoutes(t *testing.T) { 10 | t.Skip("TODO") 11 | } 12 | 13 | func TestPeerForEachConnectedPeer(t *testing.T) { 14 | t.Skip("TODO") 15 | } 16 | 17 | func forcePendingGC(routers ...*Router) { 18 | for _, router := range routers { 19 | router.Peers.Lock() 20 | if router.Peers.pendingGC { 21 | var pending peersPendingNotifications 22 | router.Peers.garbageCollect(&pending) 23 | router.Peers.unlockAndNotify(&pending) 24 | } else { 25 | router.Peers.Unlock() 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /peers_test.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | // TODO we should also test: 13 | // 14 | // - applying an incremental update, including the case where that 15 | // leads to an UnknownPeerError 16 | // 17 | // - the "improved update" calculation 18 | // 19 | // - non-gc of peers that are only referenced locally 20 | 21 | func newNode(name PeerName) (*Peer, *Peers) { 22 | peer := newLocalPeer(name, "", nil) 23 | peers := newPeers(peer) 24 | return peer.Peer, peers 25 | } 26 | 27 | // Check that ApplyUpdate copies the whole topology from peers 28 | func checkApplyUpdate(t *testing.T, peers *Peers) { 29 | dummyName, _ := PeerNameFromString("99:00:00:01:00:00") 30 | // We need a new node outside of the network, with a connection 31 | // into it. 32 | _, testBedPeers := newNode(dummyName) 33 | testBedPeers.AddTestConnection(peers.ourself.Peer) 34 | _, _, err := testBedPeers.applyUpdate(peers.encodePeers(peers.names())) 35 | require.NoError(t, err) 36 | 37 | checkTopologyPeers(t, true, testBedPeers.allPeersExcept(dummyName), peers.allPeers()...) 38 | } 39 | 40 | func TestPeersEncoding(t *testing.T) { 41 | const numNodes = 20 42 | const numIters = 1000 43 | var peer [numNodes]*Peer 44 | var ps [numNodes]*Peers 45 | for i := 0; i < numNodes; i++ { 46 | name, _ := PeerNameFromString(fmt.Sprintf("%02d:00:00:01:00:00", i)) 47 | peer[i], ps[i] = newNode(name) 48 | } 49 | 50 | var conns []struct{ from, to int } 51 | for i := 0; i < numIters; i++ { 52 | oper := rand.Intn(2) 53 | switch oper { 54 | case 0: 55 | from, to := rand.Intn(numNodes), rand.Intn(numNodes) 56 | if from != to { 57 | if _, found := peer[from].connections[peer[to].Name]; !found { 58 | ps[from].AddTestConnection(peer[to]) 59 | conns = append(conns, struct{ from, to int }{from, to}) 60 | checkApplyUpdate(t, ps[from]) 61 | } 62 | } 63 | case 1: 64 | if len(conns) > 0 { 65 | n := rand.Intn(len(conns)) 66 | c := conns[n] 67 | ps[c.from].DeleteTestConnection(peer[c.to]) 68 | ps[c.from].GarbageCollect() 69 | checkApplyUpdate(t, ps[c.from]) 70 | conns = append(conns[:n], conns[n+1:]...) 71 | } 72 | } 73 | } 74 | } 75 | 76 | func garbageCollect(peers *Peers) []*Peer { 77 | var removed []*Peer 78 | peers.OnGC(func(peer *Peer) { removed = append(removed, peer) }) 79 | peers.GarbageCollect() 80 | return removed 81 | } 82 | 83 | func TestPeersGarbageCollection(t *testing.T) { 84 | const ( 85 | peer1NameString = "01:00:00:01:00:00" 86 | peer2NameString = "02:00:00:02:00:00" 87 | peer3NameString = "03:00:00:03:00:00" 88 | ) 89 | var ( 90 | peer1Name, _ = PeerNameFromString(peer1NameString) 91 | peer2Name, _ = PeerNameFromString(peer2NameString) 92 | peer3Name, _ = PeerNameFromString(peer3NameString) 93 | ) 94 | 95 | // Create some peers with some connections to each other 96 | p1, ps1 := newNode(peer1Name) 97 | p2, ps2 := newNode(peer2Name) 98 | p3, ps3 := newNode(peer3Name) 99 | ps1.AddTestConnection(p2) 100 | ps2.AddTestRemoteConnection(p1, p2) 101 | ps2.AddTestConnection(p1) 102 | ps2.AddTestConnection(p3) 103 | ps3.AddTestConnection(p1) 104 | ps1.AddTestConnection(p3) 105 | ps2.AddTestRemoteConnection(p1, p3) 106 | ps2.AddTestRemoteConnection(p3, p1) 107 | 108 | // Every peer is referenced, so nothing should be dropped 109 | require.Empty(t, garbageCollect(ps1), "peers removed") 110 | require.Empty(t, garbageCollect(ps2), "peers removed") 111 | require.Empty(t, garbageCollect(ps3), "peers removed") 112 | 113 | // Drop the connection from 2 to 3, and 3 isn't garbage-collected 114 | // because 1 has a connection to 3 115 | ps2.DeleteTestConnection(p3) 116 | require.Empty(t, garbageCollect(ps2), "peers removed") 117 | 118 | // Drop the connection from 1 to 3, and 3 will get removed by 119 | // garbage-collection 120 | ps1.DeleteTestConnection(p3) 121 | checkPeerArray(t, garbageCollect(ps1), p3) 122 | } 123 | 124 | func TestShortIDCollisions(t *testing.T) { 125 | rng := rand.New(rand.NewSource(time.Now().UnixNano())) 126 | _, peers := newNode(PeerName(1 << peerShortIDBits)) 127 | 128 | // Make enough peers that short id collisions are 129 | // overwhelmingly likely 130 | ps := make([]*Peer, 1< entry.peer.Name) 175 | } 176 | } 177 | 178 | // Check that every peer was seen 179 | for _, n := range counts { 180 | require.Equal(t, 1, n) 181 | } 182 | 183 | // Delete all the peers 184 | shuffle() 185 | for _, p := range ps { 186 | peers.deleteByShortID(p, &pending) 187 | } 188 | 189 | for _, entry := range peers.byShortID { 190 | if entry.peer != peers.ourself.Peer { 191 | require.Nil(t, entry.peer) 192 | } 193 | 194 | require.Empty(t, entry.others) 195 | } 196 | } 197 | 198 | // Test the easy case of short id reassignment, when few short ids are taken 199 | func TestShortIDReassignmentEasy(t *testing.T) { 200 | rng := rand.New(rand.NewSource(time.Now().UnixNano())) 201 | _, peers := newNode(PeerName(0)) 202 | 203 | for i := 1; i <= 10; i++ { 204 | peers.fetchWithDefault(newPeer(PeerName(i), "", PeerUID(i), 0, 205 | PeerShortID(rng.Intn(1< minVersion { 135 | minVersion = params.MinVersion 136 | } 137 | 138 | theirMaxVersion := header[len(protocolBytes)+1] 139 | maxVersion := theirMaxVersion 140 | if maxVersion > params.MaxVersion { 141 | maxVersion = params.MaxVersion 142 | } 143 | 144 | if minVersion > maxVersion { 145 | return 0, fmt.Errorf("remote version range [%d,%d] is incompatible with ours [%d,%d]", 146 | theirMinVersion, theirMaxVersion, 147 | params.MinVersion, params.MaxVersion) 148 | } 149 | 150 | if err := <-writeDone; err != nil { 151 | return 0, err 152 | } 153 | 154 | return maxVersion, nil 155 | } 156 | 157 | // The V1 procotol consists of the protocol identification/version 158 | // header, followed by a stream of gobified values. The first value 159 | // is the encoded features map (never encrypted). The subsequent 160 | // values are the messages on the connection (encrypted for an 161 | // encrypted connection). For an encrypted connection, the public key 162 | // is passed in the "PublicKey" feature as a string of hex digits. 163 | func (res *protocolIntroResults) doIntroV1(params protocolIntroParams, pubKey, privKey *[32]byte) error { 164 | features := filterV1Features(params.Features) 165 | if pubKey != nil { 166 | features["PublicKey"] = hex.EncodeToString(pubKey[:]) 167 | } 168 | 169 | enc := gob.NewEncoder(params.Conn) 170 | dec := gob.NewDecoder(params.Conn) 171 | 172 | // Encode in a separate goroutine to avoid the possibility of 173 | // deadlock. The result channel is of size 1 so that the 174 | // goroutine does not linger even if we encounter an error on 175 | // the read side. 176 | encodeDone := make(chan error, 1) 177 | go func() { 178 | encodeDone <- enc.Encode(features) 179 | }() 180 | 181 | if err := dec.Decode(&res.Features); err != nil { 182 | return err 183 | } 184 | 185 | if err := <-encodeDone; err != nil { 186 | return err 187 | } 188 | 189 | res.Sender = newGobTCPSender(enc) 190 | res.Receiver = newGobTCPReceiver(dec) 191 | 192 | if pubKey == nil { 193 | if _, present := res.Features["PublicKey"]; present { 194 | return errExpectedNoCrypto 195 | } 196 | } else { 197 | remotePubKeyStr, ok := res.Features["PublicKey"] 198 | if !ok { 199 | return errExpectedCrypto 200 | } 201 | 202 | remotePubKey, err := hex.DecodeString(remotePubKeyStr) 203 | if err != nil { 204 | return err 205 | } 206 | 207 | res.setupCrypto(params, remotePubKey, privKey) 208 | } 209 | 210 | res.Features = filterV1Features(res.Features) 211 | return nil 212 | } 213 | 214 | // In the V1 protocol, the intro fields are sent unencrypted. So we 215 | // restrict them to an established subset of fields that are assumed 216 | // to be safe. 217 | func filterV1Features(intro map[string]string) map[string]string { 218 | safe := make(map[string]string) 219 | for _, k := range protocolV1Features { 220 | if val, ok := intro[k]; ok { 221 | safe[k] = val 222 | } 223 | } 224 | 225 | return safe 226 | } 227 | 228 | // The V2 procotol consists of the protocol identification/version 229 | // header, followed by: 230 | // 231 | // - A single "encryption flag" byte: 0 for no encryption, 1 for 232 | // encryption. 233 | // 234 | // - When the connection is encrypted, 32 bytes follow containing the 235 | // public key. 236 | // 237 | // - Then a stream of length-prefixed messages, which are encrypted 238 | // for an encrypted connection. 239 | // 240 | // The first message contains the encoded features map (so in contrast 241 | // to V1, it will be encrypted on an encrypted connection). 242 | func (res *protocolIntroResults) doIntroV2(params protocolIntroParams, pubKey, privKey *[32]byte) error { 243 | // Public key exchange 244 | var wbuf []byte 245 | if pubKey == nil { 246 | wbuf = []byte{0} 247 | } else { 248 | wbuf = make([]byte, 1+len(*pubKey)) 249 | wbuf[0] = 1 250 | copy(wbuf[1:], (*pubKey)[:]) 251 | } 252 | 253 | // Write in a separate goroutine to avoid the possibility of 254 | // deadlock. The result channel is of size 1 so that the 255 | // goroutine does not linger even if we encounter an error on 256 | // the read side. 257 | writeDone := make(chan error, 1) 258 | go func() { 259 | _, err := params.Conn.Write(wbuf) 260 | writeDone <- err 261 | }() 262 | 263 | rbuf := make([]byte, 1) 264 | if _, err := io.ReadFull(params.Conn, rbuf); err != nil { 265 | return err 266 | } 267 | 268 | switch rbuf[0] { 269 | case 0: 270 | if pubKey != nil { 271 | return errExpectedCrypto 272 | } 273 | 274 | res.Sender = newLengthPrefixTCPSender(params.Conn) 275 | res.Receiver = newLengthPrefixTCPReceiver(params.Conn) 276 | 277 | case 1: 278 | if pubKey == nil { 279 | return errExpectedNoCrypto 280 | } 281 | 282 | rbuf = make([]byte, len(pubKey)) 283 | if _, err := io.ReadFull(params.Conn, rbuf); err != nil { 284 | return err 285 | } 286 | 287 | res.Sender = newLengthPrefixTCPSender(params.Conn) 288 | res.Receiver = newLengthPrefixTCPReceiver(params.Conn) 289 | res.setupCrypto(params, rbuf, privKey) 290 | 291 | default: 292 | return fmt.Errorf("Bad encryption flag %d", rbuf[0]) 293 | } 294 | 295 | if err := <-writeDone; err != nil { 296 | return err 297 | } 298 | 299 | // Features exchange 300 | go func() { 301 | buf := new(bytes.Buffer) 302 | if err := gob.NewEncoder(buf).Encode(¶ms.Features); err != nil { 303 | writeDone <- err 304 | return 305 | } 306 | 307 | writeDone <- res.Sender.Send(buf.Bytes()) 308 | }() 309 | 310 | rbuf, err := res.Receiver.Receive() 311 | if err != nil { 312 | return err 313 | } 314 | 315 | if err := gob.NewDecoder(bytes.NewReader(rbuf)).Decode(&res.Features); err != nil { 316 | return err 317 | } 318 | 319 | if err := <-writeDone; err != nil { 320 | return err 321 | } 322 | 323 | return nil 324 | } 325 | 326 | func (res *protocolIntroResults) setupCrypto(params protocolIntroParams, remotePubKey []byte, privKey *[32]byte) { 327 | var remotePubKeyArr [32]byte 328 | copy(remotePubKeyArr[:], remotePubKey) 329 | res.SessionKey = formSessionKey(&remotePubKeyArr, privKey, params.Password) 330 | res.Sender = newEncryptedTCPSender(res.Sender, res.SessionKey, params.Outbound) 331 | res.Receiver = newEncryptedTCPReceiver(res.Receiver, res.SessionKey, params.Outbound) 332 | } 333 | 334 | // ProtocolTag identifies the type of msg encoded in a ProtocolMsg. 335 | type protocolTag byte 336 | 337 | const ( 338 | // ProtocolHeartbeat identifies a heartbeat msg. 339 | ProtocolHeartbeat = iota 340 | // ProtocolReserved1 is a legacy overly control message. 341 | ProtocolReserved1 342 | // ProtocolReserved2 is a legacy overly control message. 343 | ProtocolReserved2 344 | // ProtocolReserved3 is a legacy overly control message. 345 | ProtocolReserved3 346 | // ProtocolGossip identifies a pure gossip msg. 347 | ProtocolGossip 348 | // ProtocolGossipUnicast identifies a gossip (unicast) msg. 349 | ProtocolGossipUnicast 350 | // ProtocolGossipBroadcast identifies a gossip (broadcast) msg. 351 | ProtocolGossipBroadcast 352 | // ProtocolOverlayControlMsg identifies a control msg. 353 | ProtocolOverlayControlMsg 354 | ) 355 | 356 | // ProtocolMsg combines a tag and encoded msg. 357 | type protocolMsg struct { 358 | tag protocolTag 359 | msg []byte 360 | } 361 | 362 | type protocolSender interface { 363 | SendProtocolMsg(m protocolMsg) error 364 | } 365 | -------------------------------------------------------------------------------- /protocol_crypto.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "crypto/rand" 5 | "crypto/sha256" 6 | "encoding/binary" 7 | "encoding/gob" 8 | "fmt" 9 | "io" 10 | "sync" 11 | 12 | "golang.org/x/crypto/nacl/box" 13 | "golang.org/x/crypto/nacl/secretbox" 14 | ) 15 | 16 | // MaxTCPMsgSize is the hard limit on sends and receives. Larger messages will 17 | // result in errors. This applies to the LengthPrefixTCP{Sender,Receiver} i.e. 18 | // V2 of the protocol. 19 | const maxTCPMsgSize = 10 * 1024 * 1024 20 | 21 | // GenerateKeyPair is used during encrypted protocol introduction. 22 | func generateKeyPair() (publicKey, privateKey *[32]byte, err error) { 23 | return box.GenerateKey(rand.Reader) 24 | } 25 | 26 | // FormSessionKey is used during encrypted protocol introduction. 27 | func formSessionKey(remotePublicKey, localPrivateKey *[32]byte, secretKey []byte) *[32]byte { 28 | var sharedKey [32]byte 29 | box.Precompute(&sharedKey, remotePublicKey, localPrivateKey) 30 | sharedKeySlice := sharedKey[:] 31 | sharedKeySlice = append(sharedKeySlice, secretKey...) 32 | sessionKey := sha256.Sum256(sharedKeySlice) 33 | return &sessionKey 34 | } 35 | 36 | // TCP Senders/Receivers 37 | 38 | // TCPCryptoState stores session key, nonce, and sequence state. 39 | // 40 | // The lowest 64 bits of the nonce contain the message sequence number. The 41 | // top most bit indicates the connection polarity at the sender - '1' for 42 | // outbound; the next indicates protocol type - '1' for TCP. The remaining 126 43 | // bits are zero. The polarity is needed so that the two ends of a connection 44 | // do not use the same nonces; the protocol type so that the TCP connection 45 | // nonces are distinct from nonces used by overlay connections, if they share 46 | // the session key. This is a requirement of the NaCl Security Model; see 47 | // http://nacl.cr.yp.to/box.html. 48 | type tcpCryptoState struct { 49 | sessionKey *[32]byte 50 | nonce [24]byte 51 | seqNo uint64 52 | } 53 | 54 | // NewTCPCryptoState returns a valid TCPCryptoState. 55 | func newTCPCryptoState(sessionKey *[32]byte, outbound bool) *tcpCryptoState { 56 | s := &tcpCryptoState{sessionKey: sessionKey} 57 | if outbound { 58 | s.nonce[0] |= (1 << 7) 59 | } 60 | s.nonce[0] |= (1 << 6) 61 | return s 62 | } 63 | 64 | func (s *tcpCryptoState) advance() { 65 | s.seqNo++ 66 | binary.BigEndian.PutUint64(s.nonce[16:24], s.seqNo) 67 | } 68 | 69 | // TCPSender describes anything that can send byte buffers. 70 | // It abstracts over the different protocol version senders. 71 | type tcpSender interface { 72 | Send([]byte) error 73 | } 74 | 75 | // GobTCPSender implements TCPSender and is used in the V1 protocol. 76 | type gobTCPSender struct { 77 | encoder *gob.Encoder 78 | } 79 | 80 | func newGobTCPSender(encoder *gob.Encoder) *gobTCPSender { 81 | return &gobTCPSender{encoder: encoder} 82 | } 83 | 84 | // Send implements TCPSender by encoding the msg. 85 | func (sender *gobTCPSender) Send(msg []byte) error { 86 | return sender.encoder.Encode(msg) 87 | } 88 | 89 | // LengthPrefixTCPSender implements TCPSender and is used in the V2 protocol. 90 | type lengthPrefixTCPSender struct { 91 | writer io.Writer 92 | } 93 | 94 | func newLengthPrefixTCPSender(writer io.Writer) *lengthPrefixTCPSender { 95 | return &lengthPrefixTCPSender{writer: writer} 96 | } 97 | 98 | // Send implements TCPSender by writing the size of the msg as a big-endian 99 | // uint32 before the msg. msgs larger than MaxTCPMsgSize are rejected. 100 | func (sender *lengthPrefixTCPSender) Send(msg []byte) error { 101 | l := len(msg) 102 | if l > maxTCPMsgSize { 103 | return fmt.Errorf("outgoing message exceeds maximum size: %d > %d", l, maxTCPMsgSize) 104 | } 105 | // We copy the message so we can send it in a single Write 106 | // operation, thus making this thread-safe without locking. 107 | prefixedMsg := make([]byte, 4+l) 108 | binary.BigEndian.PutUint32(prefixedMsg, uint32(l)) 109 | copy(prefixedMsg[4:], msg) 110 | _, err := sender.writer.Write(prefixedMsg) 111 | return err 112 | } 113 | 114 | // Implement TCPSender by wrapping an existing TCPSender with tcpCryptoState. 115 | type encryptedTCPSender struct { 116 | sync.RWMutex 117 | sender tcpSender 118 | state *tcpCryptoState 119 | } 120 | 121 | func newEncryptedTCPSender(sender tcpSender, sessionKey *[32]byte, outbound bool) *encryptedTCPSender { 122 | return &encryptedTCPSender{sender: sender, state: newTCPCryptoState(sessionKey, outbound)} 123 | } 124 | 125 | // Send implements TCPSender by sealing and sending the msg as-is. 126 | func (sender *encryptedTCPSender) Send(msg []byte) error { 127 | sender.Lock() 128 | defer sender.Unlock() 129 | encodedMsg := secretbox.Seal(nil, msg, &sender.state.nonce, sender.state.sessionKey) 130 | sender.state.advance() 131 | return sender.sender.Send(encodedMsg) 132 | } 133 | 134 | // tcpReceiver describes anything that can receive byte buffers. 135 | // It abstracts over the different protocol version receivers. 136 | type tcpReceiver interface { 137 | Receive() ([]byte, error) 138 | } 139 | 140 | // gobTCPReceiver implements TCPReceiver and is used in the V1 protocol. 141 | type gobTCPReceiver struct { 142 | decoder *gob.Decoder 143 | } 144 | 145 | func newGobTCPReceiver(decoder *gob.Decoder) *gobTCPReceiver { 146 | return &gobTCPReceiver{decoder: decoder} 147 | } 148 | 149 | // Receive implements TCPReciever by Gob decoding into a byte slice directly. 150 | func (receiver *gobTCPReceiver) Receive() ([]byte, error) { 151 | var msg []byte 152 | err := receiver.decoder.Decode(&msg) 153 | return msg, err 154 | } 155 | 156 | // lengthPrefixTCPReceiver implements TCPReceiver, used in the V2 protocol. 157 | type lengthPrefixTCPReceiver struct { 158 | reader io.Reader 159 | } 160 | 161 | func newLengthPrefixTCPReceiver(reader io.Reader) *lengthPrefixTCPReceiver { 162 | return &lengthPrefixTCPReceiver{reader: reader} 163 | } 164 | 165 | // Receive implements TCPReceiver by making a length-limited read into a byte buffer. 166 | func (receiver *lengthPrefixTCPReceiver) Receive() ([]byte, error) { 167 | lenPrefix := make([]byte, 4) 168 | if _, err := io.ReadFull(receiver.reader, lenPrefix); err != nil { 169 | return nil, err 170 | } 171 | l := binary.BigEndian.Uint32(lenPrefix) 172 | if l > maxTCPMsgSize { 173 | return nil, fmt.Errorf("incoming message exceeds maximum size: %d > %d", l, maxTCPMsgSize) 174 | } 175 | msg := make([]byte, l) 176 | _, err := io.ReadFull(receiver.reader, msg) 177 | return msg, err 178 | } 179 | 180 | // encryptedTCPReceiver implements TCPReceiver by wrapping a TCPReceiver with TCPCryptoState. 181 | type encryptedTCPReceiver struct { 182 | receiver tcpReceiver 183 | state *tcpCryptoState 184 | } 185 | 186 | func newEncryptedTCPReceiver(receiver tcpReceiver, sessionKey *[32]byte, outbound bool) *encryptedTCPReceiver { 187 | return &encryptedTCPReceiver{receiver: receiver, state: newTCPCryptoState(sessionKey, !outbound)} 188 | } 189 | 190 | // Receive implements TCPReceiver by reading from the wrapped TCPReceiver and 191 | // unboxing the encrypted message, returning the decoded message. 192 | func (receiver *encryptedTCPReceiver) Receive() ([]byte, error) { 193 | msg, err := receiver.receiver.Receive() 194 | if err != nil { 195 | return nil, err 196 | } 197 | 198 | decodedMsg, success := secretbox.Open(nil, msg, &receiver.state.nonce, receiver.state.sessionKey) 199 | if !success { 200 | return nil, fmt.Errorf("Unable to decrypt TCP msg") 201 | } 202 | 203 | receiver.state.advance() 204 | return decodedMsg, nil 205 | } 206 | -------------------------------------------------------------------------------- /protocol_crypto_test.go: -------------------------------------------------------------------------------- 1 | package mesh_test 2 | 3 | import "testing" 4 | 5 | func TestGobTCPSenderReceiver(t *testing.T) { 6 | t.Skip("TODO") 7 | } 8 | 9 | func TestLengthPrefixTCPSenderReceiver(t *testing.T) { 10 | t.Skip("TODO") 11 | } 12 | 13 | func TestEncryptedTCPSenderReceiver(t *testing.T) { 14 | t.Skip("TODO") 15 | } 16 | -------------------------------------------------------------------------------- /protocol_test.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "io" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | type testConn struct { 12 | io.Writer 13 | io.Reader 14 | } 15 | 16 | func (testConn) SetDeadline(t time.Time) error { 17 | return nil 18 | } 19 | 20 | func (testConn) SetReadDeadline(t time.Time) error { 21 | return nil 22 | } 23 | 24 | func (testConn) SetWriteDeadline(t time.Time) error { 25 | return nil 26 | } 27 | 28 | func connPair() (protocolIntroConn, protocolIntroConn) { 29 | a := testConn{} 30 | b := testConn{} 31 | a.Reader, b.Writer = io.Pipe() 32 | b.Reader, a.Writer = io.Pipe() 33 | return &a, &b 34 | } 35 | 36 | func doIntro(t *testing.T, params protocolIntroParams) <-chan protocolIntroResults { 37 | ch := make(chan protocolIntroResults, 1) 38 | go func() { 39 | res, err := params.doIntro() 40 | require.Nil(t, err) 41 | ch <- res 42 | }() 43 | return ch 44 | } 45 | 46 | func doProtocolIntro(t *testing.T, aver, bver byte, password []byte) byte { 47 | aconn, bconn := connPair() 48 | aresch := doIntro(t, protocolIntroParams{ 49 | MinVersion: ProtocolMinVersion, 50 | MaxVersion: aver, 51 | Features: map[string]string{"Name": "A"}, 52 | Conn: aconn, 53 | Outbound: true, 54 | Password: password, 55 | }) 56 | bresch := doIntro(t, protocolIntroParams{ 57 | MinVersion: ProtocolMinVersion, 58 | MaxVersion: bver, 59 | Features: map[string]string{"Name": "B"}, 60 | Conn: bconn, 61 | Outbound: false, 62 | Password: password, 63 | }) 64 | ares := <-aresch 65 | bres := <-bresch 66 | 67 | // Check that features were conveyed 68 | require.Equal(t, "B", ares.Features["Name"]) 69 | require.Equal(t, "A", bres.Features["Name"]) 70 | 71 | // Check that Senders and Receivers work 72 | go func() { 73 | require.Nil(t, ares.Sender.Send([]byte("Hello from A"))) 74 | require.Nil(t, bres.Sender.Send([]byte("Hello from B"))) 75 | }() 76 | 77 | data, err := bres.Receiver.Receive() 78 | require.Nil(t, err) 79 | require.Equal(t, "Hello from A", string(data)) 80 | 81 | data, err = ares.Receiver.Receive() 82 | require.Nil(t, err) 83 | require.Equal(t, "Hello from B", string(data)) 84 | 85 | require.Equal(t, ares.Version, bres.Version) 86 | return ares.Version 87 | } 88 | 89 | func TestProtocolIntro(t *testing.T) { 90 | require.Equal(t, 2, int(doProtocolIntro(t, 2, 2, nil))) 91 | require.Equal(t, 2, int(doProtocolIntro(t, 2, 2, []byte("sekr1t")))) 92 | require.Equal(t, 1, int(doProtocolIntro(t, 1, 2, nil))) 93 | require.Equal(t, 1, int(doProtocolIntro(t, 1, 2, []byte("pa55")))) 94 | require.Equal(t, 1, int(doProtocolIntro(t, 2, 1, nil))) 95 | require.Equal(t, 1, int(doProtocolIntro(t, 2, 1, []byte("w0rd")))) 96 | } 97 | -------------------------------------------------------------------------------- /router.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | "fmt" 7 | "math" 8 | "net" 9 | "sync" 10 | "time" 11 | ) 12 | 13 | var ( 14 | // Port is the port used for all mesh communication. 15 | Port = 6783 16 | 17 | // ChannelSize is the buffer size used by so-called actor goroutines 18 | // throughout mesh. 19 | ChannelSize = 16 20 | 21 | defaultGossipInterval = 30 * time.Second 22 | ) 23 | 24 | const ( 25 | tcpHeartbeat = 30 * time.Second 26 | maxDuration = time.Duration(math.MaxInt64) 27 | acceptMaxTokens = 20 28 | acceptTokenDelay = 50 * time.Millisecond 29 | ) 30 | 31 | // Config defines dimensions of configuration for the router. 32 | // TODO(pb): provide usable defaults in NewRouter 33 | type Config struct { 34 | Host string 35 | Port int 36 | Password []byte 37 | ConnLimit int 38 | ProtocolMinVersion byte 39 | PeerDiscovery bool 40 | TrustedSubnets []*net.IPNet 41 | GossipInterval *time.Duration 42 | } 43 | 44 | // Router manages communication between this peer and the rest of the mesh. 45 | // Router implements Gossiper. 46 | type Router struct { 47 | Config 48 | Overlay Overlay 49 | Ourself *localPeer 50 | Peers *Peers 51 | Routes *routes 52 | ConnectionMaker *connectionMaker 53 | gossipLock sync.RWMutex 54 | gossipChannels gossipChannels 55 | topologyGossip Gossip 56 | acceptLimiter *tokenBucket 57 | logger Logger 58 | } 59 | 60 | // NewRouter returns a new router. It must be started. 61 | func NewRouter(config Config, name PeerName, nickName string, overlay Overlay, logger Logger) (*Router, error) { 62 | router := &Router{Config: config, gossipChannels: make(gossipChannels)} 63 | 64 | if overlay == nil { 65 | overlay = NullOverlay{} 66 | } 67 | 68 | router.Overlay = overlay 69 | router.Ourself = newLocalPeer(name, nickName, router) 70 | router.Peers = newPeers(router.Ourself) 71 | router.Peers.OnGC(func(peer *Peer) { 72 | logger.Printf("Removed unreachable peer %s", peer) 73 | }) 74 | router.Routes = newRoutes(router.Ourself, router.Peers) 75 | router.ConnectionMaker = newConnectionMaker(router.Ourself, router.Peers, net.JoinHostPort(router.Host, "0"), router.Port, router.PeerDiscovery, logger) 76 | router.logger = logger 77 | gossip, err := router.NewGossip("topology", router) 78 | if err != nil { 79 | return nil, err 80 | } 81 | router.topologyGossip = gossip 82 | router.acceptLimiter = newTokenBucket(acceptMaxTokens, acceptTokenDelay) 83 | return router, nil 84 | } 85 | 86 | // Start listening for TCP connections. This is separate from NewRouter so 87 | // that gossipers can register before we start forming connections. 88 | func (router *Router) Start() { 89 | router.listenTCP() 90 | } 91 | 92 | // Stop shuts down the router. 93 | func (router *Router) Stop() error { 94 | router.Overlay.Stop() 95 | // TODO: perform more graceful shutdown... 96 | return nil 97 | } 98 | 99 | func (router *Router) usingPassword() bool { 100 | return router.Password != nil 101 | } 102 | 103 | func (router *Router) listenTCP() { 104 | localAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(router.Host, fmt.Sprint(router.Port))) 105 | if err != nil { 106 | panic(err) 107 | } 108 | ln, err := net.ListenTCP("tcp", localAddr) 109 | if err != nil { 110 | panic(err) 111 | } 112 | go func() { 113 | defer ln.Close() 114 | for { 115 | tcpConn, err := ln.AcceptTCP() 116 | if err != nil { 117 | router.logger.Printf("%v", err) 118 | continue 119 | } 120 | router.acceptTCP(tcpConn) 121 | router.acceptLimiter.wait() 122 | } 123 | }() 124 | } 125 | 126 | func (router *Router) acceptTCP(tcpConn *net.TCPConn) { 127 | remoteAddrStr := tcpConn.RemoteAddr().String() 128 | router.logger.Printf("->[%s] connection accepted", remoteAddrStr) 129 | connRemote := newRemoteConnection(router.Ourself.Peer, nil, remoteAddrStr, false, false) 130 | startLocalConnection(connRemote, tcpConn, router, true, router.logger) 131 | } 132 | 133 | // NewGossip returns a usable GossipChannel from the router. 134 | // 135 | // TODO(pb): rename? 136 | func (router *Router) NewGossip(channelName string, g Gossiper) (Gossip, error) { 137 | channel := newGossipChannel(channelName, router.Ourself, router.Routes, g, router.logger) 138 | router.gossipLock.Lock() 139 | defer router.gossipLock.Unlock() 140 | if _, found := router.gossipChannels[channelName]; found { 141 | return nil, fmt.Errorf("[gossip] duplicate channel %s", channelName) 142 | } 143 | router.gossipChannels[channelName] = channel 144 | return channel, nil 145 | } 146 | 147 | func (router *Router) gossipChannel(channelName string) *gossipChannel { 148 | router.gossipLock.RLock() 149 | channel, found := router.gossipChannels[channelName] 150 | router.gossipLock.RUnlock() 151 | if found { 152 | return channel 153 | } 154 | router.gossipLock.Lock() 155 | defer router.gossipLock.Unlock() 156 | if channel, found = router.gossipChannels[channelName]; found { 157 | return channel 158 | } 159 | channel = newGossipChannel(channelName, router.Ourself, router.Routes, &surrogateGossiper{router: router}, router.logger) 160 | channel.logf("created surrogate channel") 161 | router.gossipChannels[channelName] = channel 162 | return channel 163 | } 164 | 165 | func (router *Router) gossipChannelSet() map[*gossipChannel]struct{} { 166 | channels := make(map[*gossipChannel]struct{}) 167 | router.gossipLock.RLock() 168 | defer router.gossipLock.RUnlock() 169 | for _, channel := range router.gossipChannels { 170 | channels[channel] = struct{}{} 171 | } 172 | return channels 173 | } 174 | 175 | func (router *Router) gossipInterval() time.Duration { 176 | if router.Config.GossipInterval != nil { 177 | return *router.Config.GossipInterval 178 | } else { 179 | return defaultGossipInterval 180 | } 181 | } 182 | 183 | func (router *Router) handleGossip(tag protocolTag, payload []byte) error { 184 | decoder := gob.NewDecoder(bytes.NewReader(payload)) 185 | var channelName string 186 | if err := decoder.Decode(&channelName); err != nil { 187 | return err 188 | } 189 | channel := router.gossipChannel(channelName) 190 | var srcName PeerName 191 | if err := decoder.Decode(&srcName); err != nil { 192 | return err 193 | } 194 | switch tag { 195 | case ProtocolGossipUnicast: 196 | return channel.deliverUnicast(srcName, payload, decoder) 197 | case ProtocolGossipBroadcast: 198 | return channel.deliverBroadcast(srcName, payload, decoder) 199 | case ProtocolGossip: 200 | return channel.deliver(srcName, payload, decoder) 201 | } 202 | return nil 203 | } 204 | 205 | // Relay all pending gossip data for each channel via random neighbours. 206 | func (router *Router) sendAllGossip() { 207 | for channel := range router.gossipChannelSet() { 208 | if gossip := channel.gossiper.Gossip(); gossip != nil { 209 | channel.Send(gossip) 210 | } 211 | } 212 | } 213 | 214 | // Relay all pending gossip data for each channel via conn. 215 | func (router *Router) sendAllGossipDown(conn Connection) { 216 | for channel := range router.gossipChannelSet() { 217 | if gossip := channel.gossiper.Gossip(); gossip != nil { 218 | channel.SendDown(conn, gossip) 219 | } 220 | } 221 | } 222 | 223 | // for testing 224 | func (router *Router) sendPendingGossip() bool { 225 | sentSomething := false 226 | for conn := range router.Ourself.getConnections() { 227 | sentSomething = conn.(gossipConnection).gossipSenders().Flush() || sentSomething 228 | } 229 | return sentSomething 230 | } 231 | 232 | // BroadcastTopologyUpdate is invoked whenever there is a change to the mesh 233 | // topology, and broadcasts the new set of peers to the mesh. 234 | func (router *Router) broadcastTopologyUpdate(update peerNameSet) { 235 | gossipData := &topologyGossipData{peers: router.Peers, update: update} 236 | router.topologyGossip.GossipNeighbourSubset(gossipData) 237 | } 238 | 239 | // OnGossipUnicast implements Gossiper, but always returns an error, as a 240 | // router should only receive gossip broadcasts of TopologyGossipData. 241 | func (router *Router) OnGossipUnicast(sender PeerName, msg []byte) error { 242 | return fmt.Errorf("unexpected topology gossip unicast: %v", msg) 243 | } 244 | 245 | // OnGossipBroadcast receives broadcasts of TopologyGossipData. 246 | // It returns the received update unchanged. 247 | func (router *Router) OnGossipBroadcast(_ PeerName, update []byte) (GossipData, error) { 248 | origUpdate, _, err := router.applyTopologyUpdate(update) 249 | if err != nil || len(origUpdate) == 0 { 250 | return nil, err 251 | } 252 | return &topologyGossipData{peers: router.Peers, update: origUpdate}, nil 253 | } 254 | 255 | // Gossip yields the current topology as GossipData. 256 | func (router *Router) Gossip() GossipData { 257 | return &topologyGossipData{peers: router.Peers, update: router.Peers.names()} 258 | } 259 | 260 | // OnGossip receives broadcasts of TopologyGossipData. 261 | // It returns an "improved" version of the received update. 262 | // See peers.ApplyUpdate. 263 | func (router *Router) OnGossip(update []byte) (GossipData, error) { 264 | _, newUpdate, err := router.applyTopologyUpdate(update) 265 | if err != nil || len(newUpdate) == 0 { 266 | return nil, err 267 | } 268 | return &topologyGossipData{peers: router.Peers, update: newUpdate}, nil 269 | } 270 | 271 | func (router *Router) applyTopologyUpdate(update []byte) (peerNameSet, peerNameSet, error) { 272 | origUpdate, newUpdate, err := router.Peers.applyUpdate(update) 273 | if err != nil { 274 | return nil, nil, err 275 | } 276 | if len(newUpdate) > 0 { 277 | router.ConnectionMaker.refresh() 278 | router.Routes.recalculate() 279 | } 280 | return origUpdate, newUpdate, nil 281 | } 282 | 283 | func (router *Router) trusts(remote *remoteConnection) bool { 284 | if tcpAddr, err := net.ResolveTCPAddr("tcp", remote.remoteTCPAddr); err == nil { 285 | for _, trustedSubnet := range router.TrustedSubnets { 286 | if trustedSubnet.Contains(tcpAddr.IP) { 287 | return true 288 | } 289 | } 290 | } else { 291 | // Should not happen as remoteTCPAddr was obtained from TCPConn 292 | router.logger.Printf("Unable to parse remote TCP addr: %s", err) 293 | } 294 | return false 295 | } 296 | 297 | // The set of peers in the mesh network. 298 | // Gossiped just like anything else. 299 | type topologyGossipData struct { 300 | peers *Peers 301 | update peerNameSet 302 | } 303 | 304 | // Merge implements GossipData. 305 | func (d *topologyGossipData) Merge(other GossipData) GossipData { 306 | names := make(peerNameSet) 307 | for name := range d.update { 308 | names[name] = struct{}{} 309 | } 310 | for name := range other.(*topologyGossipData).update { 311 | names[name] = struct{}{} 312 | } 313 | return &topologyGossipData{peers: d.peers, update: names} 314 | } 315 | 316 | // Encode implements GossipData. 317 | func (d *topologyGossipData) Encode() [][]byte { 318 | return [][]byte{d.peers.encodePeers(d.update)} 319 | } 320 | -------------------------------------------------------------------------------- /routes.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "math" 5 | "math/rand" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | type unicastRoutes map[PeerName]PeerName 11 | type broadcastRoutes map[PeerName][]PeerName 12 | 13 | // routes aggregates unicast and broadcast routes for our peer. 14 | type routes struct { 15 | sync.RWMutex 16 | ourself *localPeer 17 | peers *Peers 18 | onChange []func() 19 | unicast unicastRoutes 20 | unicastAll unicastRoutes // [1] 21 | broadcast broadcastRoutes 22 | broadcastAll broadcastRoutes // [1] 23 | recalcTimer *time.Timer 24 | pendingRecalc bool 25 | wait chan chan struct{} 26 | action chan<- func() 27 | // [1] based on *all* connections, not just established & 28 | // symmetric ones 29 | } 30 | 31 | const ( 32 | // We defer recalculation requests by up to 100ms, in order to 33 | // coalesce multiple recalcs together. 34 | recalcDeferTime = 100 * time.Millisecond 35 | ) 36 | 37 | // newRoutes returns a usable Routes based on the LocalPeer and existing Peers. 38 | func newRoutes(ourself *localPeer, peers *Peers) *routes { 39 | wait := make(chan chan struct{}) 40 | action := make(chan func()) 41 | r := &routes{ 42 | ourself: ourself, 43 | peers: peers, 44 | unicast: unicastRoutes{ourself.Name: UnknownPeerName}, 45 | unicastAll: unicastRoutes{ourself.Name: UnknownPeerName}, 46 | broadcast: broadcastRoutes{ourself.Name: []PeerName{}}, 47 | broadcastAll: broadcastRoutes{ourself.Name: []PeerName{}}, 48 | recalcTimer: time.NewTimer(time.Hour), 49 | wait: wait, 50 | action: action, 51 | } 52 | r.recalcTimer.Stop() 53 | go r.run(wait, action) 54 | return r 55 | } 56 | 57 | // OnChange appends callback to the functions that will be called whenever the 58 | // routes are recalculated. 59 | func (r *routes) OnChange(callback func()) { 60 | r.Lock() 61 | defer r.Unlock() 62 | r.onChange = append(r.onChange, callback) 63 | } 64 | 65 | // PeerNames returns the peers that are accountd for in the r. 66 | func (r *routes) PeerNames() peerNameSet { 67 | return r.peers.names() 68 | } 69 | 70 | // Unicast returns the next hop on the unicast route to the named peer, 71 | // based on established and symmetric connections. 72 | func (r *routes) Unicast(name PeerName) (PeerName, bool) { 73 | r.RLock() 74 | defer r.RUnlock() 75 | hop, found := r.unicast[name] 76 | return hop, found 77 | } 78 | 79 | // UnicastAll returns the next hop on the unicast route to the named peer, 80 | // based on all connections. 81 | func (r *routes) UnicastAll(name PeerName) (PeerName, bool) { 82 | r.RLock() 83 | defer r.RUnlock() 84 | hop, found := r.unicastAll[name] 85 | return hop, found 86 | } 87 | 88 | // Broadcast returns the set of peer names that should be notified 89 | // when we receive a broadcast message originating from the named peer 90 | // based on established and symmetric connections. 91 | func (r *routes) Broadcast(name PeerName) []PeerName { 92 | return r.lookupOrCalculate(name, &r.broadcast, true) 93 | } 94 | 95 | // BroadcastAll returns the set of peer names that should be notified 96 | // when we receive a broadcast message originating from the named peer 97 | // based on all connections. 98 | func (r *routes) BroadcastAll(name PeerName) []PeerName { 99 | return r.lookupOrCalculate(name, &r.broadcastAll, false) 100 | } 101 | 102 | func (r *routes) lookupOrCalculate(name PeerName, broadcast *broadcastRoutes, establishedAndSymmetric bool) []PeerName { 103 | r.RLock() 104 | hops, found := (*broadcast)[name] 105 | r.RUnlock() 106 | if found { 107 | return hops 108 | } 109 | res := make(chan []PeerName) 110 | r.action <- func() { 111 | r.RLock() 112 | hops, found := (*broadcast)[name] 113 | r.RUnlock() 114 | if found { 115 | res <- hops 116 | return 117 | } 118 | r.peers.RLock() 119 | r.ourself.RLock() 120 | hops = r.calculateBroadcast(name, establishedAndSymmetric) 121 | r.ourself.RUnlock() 122 | r.peers.RUnlock() 123 | res <- hops 124 | r.Lock() 125 | (*broadcast)[name] = hops 126 | r.Unlock() 127 | } 128 | return <-res 129 | } 130 | 131 | // RandomNeighbours chooses min(2 log2(n_peers), n_neighbouring_peers) 132 | // neighbours, with a random distribution that is topology-sensitive, 133 | // favouring neighbours at the end of "bottleneck links". We determine the 134 | // latter based on the unicast routing table. If a neighbour appears as the 135 | // value more frequently than others - meaning that we reach a higher 136 | // proportion of peers via that neighbour than other neighbours - then it is 137 | // chosen with a higher probability. 138 | // 139 | // Note that we choose 2log2(n_peers) *neighbours*, not peers. Consequently, on 140 | // sparsely connected peers this function returns a higher proportion of 141 | // neighbours than elsewhere. In extremis, on peers with fewer than 142 | // log2(n_peers) neighbours, all neighbours are returned. 143 | func (r *routes) randomNeighbours(except PeerName) []PeerName { 144 | r.RLock() 145 | defer r.RUnlock() 146 | var total int64 = 0 147 | weights := make(map[PeerName]int64) 148 | // First iterate the whole set, counting how often each neighbour appears 149 | for _, dst := range r.unicastAll { 150 | if dst != UnknownPeerName && dst != except { 151 | total++ 152 | weights[dst]++ 153 | } 154 | } 155 | needed := int(math.Min(2*math.Log2(float64(len(r.unicastAll))), float64(len(weights)))) 156 | destinations := make([]PeerName, 0, needed) 157 | for len(destinations) < needed { 158 | // Pick a random point on the distribution and linear search for it 159 | rnd := rand.Int63n(total) 160 | for dst, count := range weights { 161 | if rnd < count { 162 | destinations = append(destinations, dst) 163 | // Remove the one we selected from consideration 164 | delete(weights, dst) 165 | total -= count 166 | break 167 | } 168 | rnd -= count 169 | } 170 | } 171 | return destinations 172 | } 173 | 174 | // Recalculate requests recalculation of the routing table. This is async but 175 | // can effectively be made synchronous with a subsequent call to 176 | // EnsureRecalculated. 177 | func (r *routes) recalculate() { 178 | r.Lock() 179 | if !r.pendingRecalc { 180 | r.recalcTimer.Reset(recalcDeferTime) 181 | r.pendingRecalc = true 182 | } 183 | r.Unlock() 184 | } 185 | 186 | func (r *routes) clearPendingRecalcFlag() { 187 | r.Lock() 188 | r.pendingRecalc = false 189 | r.Unlock() 190 | } 191 | 192 | // EnsureRecalculated waits for any preceding Recalculate requests to finish. 193 | func (r *routes) ensureRecalculated() { 194 | var done chan struct{} 195 | // If another call is already waiting, wait on the same chan, otherwise make a new one 196 | select { 197 | case done = <-r.wait: 198 | default: 199 | done = make(chan struct{}) 200 | } 201 | r.wait <- done 202 | <-done 203 | } 204 | 205 | func (r *routes) run(wait <-chan chan struct{}, action <-chan func()) { 206 | for { 207 | select { 208 | case <-r.recalcTimer.C: 209 | r.clearPendingRecalcFlag() 210 | r.calculate() 211 | case done := <-wait: 212 | r.Lock() 213 | pending := r.pendingRecalc 214 | r.Unlock() 215 | if pending { 216 | <-r.recalcTimer.C 217 | r.clearPendingRecalcFlag() 218 | r.calculate() 219 | } 220 | close(done) 221 | case f := <-action: 222 | f() 223 | } 224 | } 225 | } 226 | 227 | // Calculate unicast and broadcast routes from r.ourself, and reset 228 | // the broadcast route cache. 229 | func (r *routes) calculate() { 230 | r.peers.RLock() 231 | r.ourself.RLock() 232 | var ( 233 | unicast = r.calculateUnicast(true) 234 | unicastAll = r.calculateUnicast(false) 235 | broadcast = make(broadcastRoutes) 236 | broadcastAll = make(broadcastRoutes) 237 | ) 238 | broadcast[r.ourself.Name] = r.calculateBroadcast(r.ourself.Name, true) 239 | broadcastAll[r.ourself.Name] = r.calculateBroadcast(r.ourself.Name, false) 240 | r.ourself.RUnlock() 241 | r.peers.RUnlock() 242 | 243 | r.Lock() 244 | r.unicast = unicast 245 | r.unicastAll = unicastAll 246 | r.broadcast = broadcast 247 | r.broadcastAll = broadcastAll 248 | onChange := r.onChange 249 | r.Unlock() 250 | 251 | for _, callback := range onChange { 252 | callback() 253 | } 254 | } 255 | 256 | // Calculate all the routes for the question: if *we* want to send a 257 | // packet to Peer X, what is the next hop? 258 | // 259 | // When we sniff a packet, we determine the destination peer 260 | // ourself. Consequently, we can relay the packet via any 261 | // arbitrary peers - the intermediate peers do not have to have 262 | // any knowledge of the MAC address at all. Thus there's no need 263 | // to exchange knowledge of MAC addresses, nor any constraints on 264 | // the routes that we construct. 265 | func (r *routes) calculateUnicast(establishedAndSymmetric bool) unicastRoutes { 266 | _, unicast := r.ourself.routes(nil, establishedAndSymmetric) 267 | return unicast 268 | } 269 | 270 | // Calculate the route to answer the question: if we receive a 271 | // broadcast originally from Peer X, which peers should we pass the 272 | // frames on to? 273 | // 274 | // When the topology is stable, and thus all peers perform route 275 | // calculations based on the same data, the algorithm ensures that 276 | // broadcasts reach every peer exactly once. 277 | // 278 | // This is largely due to properties of the Peer.Routes algorithm. In 279 | // particular: 280 | // 281 | // ForAll X,Y,Z in Peers. 282 | // X.Routes(Y) <= X.Routes(Z) \/ 283 | // X.Routes(Z) <= X.Routes(Y) 284 | // ForAll X,Y,Z in Peers. 285 | // Y =/= Z /\ X.Routes(Y) <= X.Routes(Z) => 286 | // X.Routes(Y) u [P | Y.HasSymmetricConnectionTo(P)] <= X.Routes(Z) 287 | // where <= is the subset relationship on keys of the returned map. 288 | func (r *routes) calculateBroadcast(name PeerName, establishedAndSymmetric bool) []PeerName { 289 | hops := []PeerName{} 290 | peer, found := r.peers.byName[name] 291 | if !found { 292 | return hops 293 | } 294 | if found, reached := peer.routes(r.ourself.Peer, establishedAndSymmetric); found { 295 | r.ourself.forEachConnectedPeer(establishedAndSymmetric, reached, 296 | func(remotePeer *Peer) { hops = append(hops, remotePeer.Name) }) 297 | } 298 | return hops 299 | } 300 | -------------------------------------------------------------------------------- /routes_test.go: -------------------------------------------------------------------------------- 1 | package mesh_test 2 | 3 | import "testing" 4 | 5 | func TestRoutesUnicast(t *testing.T) { 6 | t.Skip("TODO") 7 | } 8 | 9 | func TestRoutesUnicastAll(t *testing.T) { 10 | t.Skip("TODO") 11 | } 12 | 13 | func TestRoutesBroadcast(t *testing.T) { 14 | t.Skip("TODO") 15 | } 16 | 17 | func TestRoutesBroadcastAll(t *testing.T) { 18 | t.Skip("TODO") 19 | } 20 | 21 | func TestRoutesRecalculate(t *testing.T) { 22 | t.Skip("TODO") 23 | } 24 | -------------------------------------------------------------------------------- /status.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | ) 7 | 8 | // Status is our current state as a peer, as taken from a router. 9 | // This is designed to be used as diagnostic information. 10 | type Status struct { 11 | Protocol string 12 | ProtocolMinVersion int 13 | ProtocolMaxVersion int 14 | Encryption bool 15 | PeerDiscovery bool 16 | Name string 17 | NickName string 18 | Port int 19 | Peers []PeerStatus 20 | UnicastRoutes []unicastRouteStatus 21 | BroadcastRoutes []broadcastRouteStatus 22 | Connections []LocalConnectionStatus 23 | TerminationCount int 24 | Targets []string 25 | OverlayDiagnostics interface{} 26 | TrustedSubnets []string 27 | } 28 | 29 | // NewStatus returns a Status object, taken as a snapshot from the router. 30 | func NewStatus(router *Router) *Status { 31 | return &Status{ 32 | Protocol: Protocol, 33 | ProtocolMinVersion: int(router.ProtocolMinVersion), 34 | ProtocolMaxVersion: ProtocolMaxVersion, 35 | Encryption: router.usingPassword(), 36 | PeerDiscovery: router.PeerDiscovery, 37 | Name: router.Ourself.Name.String(), 38 | NickName: router.Ourself.NickName, 39 | Port: router.Port, 40 | Peers: makePeerStatusSlice(router.Peers), 41 | UnicastRoutes: makeUnicastRouteStatusSlice(router.Routes), 42 | BroadcastRoutes: makeBroadcastRouteStatusSlice(router.Routes), 43 | Connections: makeLocalConnectionStatusSlice(router.ConnectionMaker), 44 | TerminationCount: router.ConnectionMaker.terminationCount, 45 | Targets: router.ConnectionMaker.Targets(false), 46 | OverlayDiagnostics: router.Overlay.Diagnostics(), 47 | TrustedSubnets: makeTrustedSubnetsSlice(router.TrustedSubnets), 48 | } 49 | } 50 | 51 | // PeerStatus is the current state of a peer in the mesh. 52 | type PeerStatus struct { 53 | Name string 54 | NickName string 55 | UID PeerUID 56 | ShortID PeerShortID 57 | Version uint64 58 | Connections []connectionStatus 59 | } 60 | 61 | // makePeerStatusSlice takes a snapshot of the state of peers. 62 | func makePeerStatusSlice(peers *Peers) []PeerStatus { 63 | var slice []PeerStatus 64 | 65 | peers.forEach(func(peer *Peer) { 66 | var connections []connectionStatus 67 | if peer == peers.ourself.Peer { 68 | for conn := range peers.ourself.getConnections() { 69 | connections = append(connections, makeConnectionStatus(conn)) 70 | } 71 | } else { 72 | // Modifying peer.connections requires a write lock on 73 | // Peers, and since we are holding a read lock (due to the 74 | // ForEach), access without locking the peer is safe. 75 | for _, conn := range peer.connections { 76 | connections = append(connections, makeConnectionStatus(conn)) 77 | } 78 | } 79 | slice = append(slice, PeerStatus{ 80 | peer.Name.String(), 81 | peer.NickName, 82 | peer.UID, 83 | peer.ShortID, 84 | peer.Version, 85 | connections, 86 | }) 87 | }) 88 | 89 | return slice 90 | } 91 | 92 | type connectionStatus struct { 93 | Name string 94 | NickName string 95 | Address string 96 | Outbound bool 97 | Established bool 98 | } 99 | 100 | func makeConnectionStatus(c Connection) connectionStatus { 101 | return connectionStatus{ 102 | Name: c.Remote().Name.String(), 103 | NickName: c.Remote().NickName, 104 | Address: c.remoteTCPAddress(), 105 | Outbound: c.isOutbound(), 106 | Established: c.isEstablished(), 107 | } 108 | } 109 | 110 | // unicastRouteStatus is the current state of an established unicast route. 111 | type unicastRouteStatus struct { 112 | Dest, Via string 113 | } 114 | 115 | // makeUnicastRouteStatusSlice takes a snapshot of the unicast routes in routes. 116 | func makeUnicastRouteStatusSlice(r *routes) []unicastRouteStatus { 117 | r.RLock() 118 | defer r.RUnlock() 119 | 120 | var slice []unicastRouteStatus 121 | for dest, via := range r.unicast { 122 | slice = append(slice, unicastRouteStatus{dest.String(), via.String()}) 123 | } 124 | return slice 125 | } 126 | 127 | // BroadcastRouteStatus is the current state of an established broadcast route. 128 | type broadcastRouteStatus struct { 129 | Source string 130 | Via []string 131 | } 132 | 133 | // makeBroadcastRouteStatusSlice takes a snapshot of the broadcast routes in routes. 134 | func makeBroadcastRouteStatusSlice(r *routes) []broadcastRouteStatus { 135 | r.RLock() 136 | defer r.RUnlock() 137 | 138 | var slice []broadcastRouteStatus 139 | for source, via := range r.broadcast { 140 | var hops []string 141 | for _, hop := range via { 142 | hops = append(hops, hop.String()) 143 | } 144 | slice = append(slice, broadcastRouteStatus{source.String(), hops}) 145 | } 146 | return slice 147 | } 148 | 149 | // LocalConnectionStatus is the current state of a physical connection to a peer. 150 | type LocalConnectionStatus struct { 151 | Address string 152 | Outbound bool 153 | State string 154 | Info string 155 | Attrs map[string]interface{} 156 | } 157 | 158 | // makeLocalConnectionStatusSlice takes a snapshot of the active local 159 | // connections in the ConnectionMaker. 160 | func makeLocalConnectionStatusSlice(cm *connectionMaker) []LocalConnectionStatus { 161 | resultChan := make(chan []LocalConnectionStatus) 162 | cm.actionChan <- func() bool { 163 | var slice []LocalConnectionStatus 164 | for conn := range cm.connections { 165 | state := "pending" 166 | if conn.isEstablished() { 167 | state = "established" 168 | } 169 | lc, _ := conn.(*LocalConnection) 170 | attrs := lc.OverlayConn.Attrs() 171 | name, ok := attrs["name"] 172 | if !ok { 173 | name = "none" 174 | } 175 | info := fmt.Sprintf("%-6v %v", name, conn.Remote()) 176 | if lc.router.usingPassword() { 177 | if lc.untrusted() { 178 | info = fmt.Sprintf("%-11v %v", "encrypted", info) 179 | if attrs != nil { 180 | attrs["encrypted"] = true 181 | } 182 | } else { 183 | info = fmt.Sprintf("%-11v %v", "unencrypted", info) 184 | } 185 | } 186 | slice = append(slice, LocalConnectionStatus{conn.remoteTCPAddress(), conn.isOutbound(), state, info, attrs}) 187 | } 188 | for address, target := range cm.targets { 189 | add := func(state, info string) { 190 | slice = append(slice, LocalConnectionStatus{address, true, state, info, nil}) 191 | } 192 | switch target.state { 193 | case targetWaiting: 194 | until := "never" 195 | if !target.tryAfter.IsZero() { 196 | until = target.tryAfter.String() 197 | } 198 | if target.lastError == nil { // shouldn't happen 199 | add("waiting", "until: "+until) 200 | } else { 201 | add("failed", target.lastError.Error()+", retry: "+until) 202 | } 203 | case targetAttempting: 204 | if target.lastError == nil { 205 | add("connecting", "") 206 | } else { 207 | add("retrying", target.lastError.Error()) 208 | } 209 | case targetConnected: 210 | case targetSuspended: 211 | } 212 | } 213 | resultChan <- slice 214 | return false 215 | } 216 | return <-resultChan 217 | } 218 | 219 | // makeTrustedSubnetsSlice makes a human-readable copy of the trustedSubnets. 220 | func makeTrustedSubnetsSlice(trustedSubnets []*net.IPNet) []string { 221 | trustedSubnetStrs := []string{} 222 | for _, trustedSubnet := range trustedSubnets { 223 | trustedSubnetStrs = append(trustedSubnetStrs, trustedSubnet.String()) 224 | } 225 | return trustedSubnetStrs 226 | } 227 | -------------------------------------------------------------------------------- /surrogate_gossiper.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "bytes" 5 | "hash/fnv" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | // surrogateGossiper ignores unicasts and relays broadcasts and gossips. 11 | type surrogateGossiper struct { 12 | sync.Mutex 13 | prevUpdates []prevUpdate 14 | router *Router 15 | } 16 | 17 | type prevUpdate struct { 18 | update []byte 19 | hash uint64 20 | t time.Time 21 | } 22 | 23 | var _ Gossiper = &surrogateGossiper{} 24 | 25 | // Hook to mock time for testing 26 | var now = func() time.Time { return time.Now() } 27 | 28 | // OnGossipUnicast implements Gossiper. 29 | func (*surrogateGossiper) OnGossipUnicast(sender PeerName, msg []byte) error { 30 | return nil 31 | } 32 | 33 | // OnGossipBroadcast implements Gossiper. 34 | func (*surrogateGossiper) OnGossipBroadcast(_ PeerName, update []byte) (GossipData, error) { 35 | return newSurrogateGossipData(update), nil 36 | } 37 | 38 | // Gossip implements Gossiper. 39 | func (*surrogateGossiper) Gossip() GossipData { 40 | return nil 41 | } 42 | 43 | // OnGossip should return "everything new I've just learnt". 44 | // surrogateGossiper doesn't understand the content of messages, but it can eliminate simple duplicates 45 | func (s *surrogateGossiper) OnGossip(update []byte) (GossipData, error) { 46 | hash := fnv.New64a() 47 | _, _ = hash.Write(update) 48 | updateHash := hash.Sum64() 49 | s.Lock() 50 | defer s.Unlock() 51 | for _, p := range s.prevUpdates { 52 | if updateHash == p.hash && bytes.Equal(update, p.update) { 53 | return nil, nil 54 | } 55 | } 56 | // Delete anything that's older than the gossip interval, so we don't grow forever 57 | // (this time limit is arbitrary; surrogateGossiper should pass on new gossip immediately 58 | // so there should be no reason for a duplicate to show up after a long time) 59 | updateTime := now() 60 | gossipInterval := defaultGossipInterval 61 | if s.router != nil { 62 | gossipInterval = s.router.gossipInterval() 63 | } 64 | deleteBefore := updateTime.Add(-gossipInterval) 65 | keepFrom := len(s.prevUpdates) 66 | for i, p := range s.prevUpdates { 67 | if p.t.After(deleteBefore) { 68 | keepFrom = i 69 | break 70 | } 71 | } 72 | s.prevUpdates = append(s.prevUpdates[keepFrom:], prevUpdate{update, updateHash, updateTime}) 73 | return newSurrogateGossipData(update), nil 74 | } 75 | 76 | // surrogateGossipData is a simple in-memory GossipData. 77 | type surrogateGossipData struct { 78 | messages [][]byte 79 | } 80 | 81 | var _ GossipData = &surrogateGossipData{} 82 | 83 | func newSurrogateGossipData(msg []byte) *surrogateGossipData { 84 | return &surrogateGossipData{messages: [][]byte{msg}} 85 | } 86 | 87 | // Encode implements GossipData. 88 | func (d *surrogateGossipData) Encode() [][]byte { 89 | return d.messages 90 | } 91 | 92 | // Merge implements GossipData. 93 | func (d *surrogateGossipData) Merge(other GossipData) GossipData { 94 | o := other.(*surrogateGossipData) 95 | messages := make([][]byte, 0, len(d.messages)+len(o.messages)) 96 | messages = append(messages, d.messages...) 97 | messages = append(messages, o.messages...) 98 | return &surrogateGossipData{messages: messages} 99 | } 100 | -------------------------------------------------------------------------------- /surrogate_gossiper_test.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import "testing" 4 | import "time" 5 | import "github.com/stretchr/testify/require" 6 | 7 | func TestSurrogateGossiperUnicast(t *testing.T) { 8 | t.Skip("TODO") 9 | } 10 | 11 | func TestSurrogateGossiperBroadcast(t *testing.T) { 12 | t.Skip("TODO") 13 | } 14 | 15 | func TestSurrogateGossiperGossip(t *testing.T) { 16 | t.Skip("TODO") 17 | } 18 | 19 | func checkOnGossip(t *testing.T, s Gossiper, input, expected []byte) { 20 | r, err := s.OnGossip(input) 21 | require.NoError(t, err) 22 | if r == nil { 23 | if expected == nil { 24 | return 25 | } 26 | require.Fail(t, "Gossip result should NOT be nil, but was") 27 | } 28 | require.Equal(t, [][]byte{expected}, r.Encode()) 29 | } 30 | 31 | func TestSurrogateGossiperOnGossip(t *testing.T) { 32 | myTime := time.Now() 33 | now = func() time.Time { return myTime } 34 | s := &surrogateGossiper{} 35 | msg := [][]byte{[]byte("test 1"), []byte("test 2"), []byte("test 3"), []byte("test 4")} 36 | checkOnGossip(t, s, msg[0], msg[0]) 37 | checkOnGossip(t, s, msg[1], msg[1]) 38 | checkOnGossip(t, s, msg[0], nil) 39 | checkOnGossip(t, s, msg[1], nil) 40 | myTime = myTime.Add(defaultGossipInterval / 2) // Should not trigger cleardown 41 | checkOnGossip(t, s, msg[2], msg[2]) // Only clears out old ones on new entry 42 | checkOnGossip(t, s, msg[0], nil) 43 | checkOnGossip(t, s, msg[1], nil) 44 | myTime = myTime.Add(defaultGossipInterval) 45 | checkOnGossip(t, s, msg[0], nil) 46 | checkOnGossip(t, s, msg[3], msg[3]) // Only clears out old ones on new entry 47 | checkOnGossip(t, s, msg[0], msg[0]) 48 | checkOnGossip(t, s, msg[0], nil) 49 | } 50 | 51 | func TestSurrogateGossipDataEncode(t *testing.T) { 52 | t.Skip("TODO") 53 | } 54 | 55 | func TestSurrogateGossipDataMerge(t *testing.T) { 56 | t.Skip("TODO") 57 | } 58 | -------------------------------------------------------------------------------- /token_bucket.go: -------------------------------------------------------------------------------- 1 | package mesh 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | // TokenBucket acts as a rate-limiter. 8 | // It is not safe for concurrent use by multiple goroutines. 9 | type tokenBucket struct { 10 | capacity int64 // Maximum capacity of bucket 11 | tokenInterval time.Duration // Token replenishment rate 12 | refillDuration time.Duration // Time to refill from empty 13 | earliestUnspentToken time.Time 14 | } 15 | 16 | // newTokenBucket returns a bucket containing capacity tokens, refilled at a 17 | // rate of one token per tokenInterval. 18 | func newTokenBucket(capacity int64, tokenInterval time.Duration) *tokenBucket { 19 | tb := tokenBucket{ 20 | capacity: capacity, 21 | tokenInterval: tokenInterval, 22 | refillDuration: tokenInterval * time.Duration(capacity)} 23 | 24 | tb.earliestUnspentToken = tb.capacityToken() 25 | 26 | return &tb 27 | } 28 | 29 | // Blocks until there is a token available. 30 | // Not safe for concurrent use by multiple goroutines. 31 | func (tb *tokenBucket) wait() { 32 | // If earliest unspent token is in the future, sleep until then 33 | time.Sleep(time.Until(tb.earliestUnspentToken)) 34 | 35 | // Alternatively, enforce bucket capacity if necessary 36 | capacityToken := tb.capacityToken() 37 | if tb.earliestUnspentToken.Before(capacityToken) { 38 | tb.earliestUnspentToken = capacityToken 39 | } 40 | 41 | // 'Remove' a token from the bucket 42 | tb.earliestUnspentToken = tb.earliestUnspentToken.Add(tb.tokenInterval) 43 | } 44 | 45 | // Determine the historic token timestamp representing a full bucket 46 | func (tb *tokenBucket) capacityToken() time.Time { 47 | return time.Now().Add(-tb.refillDuration).Truncate(tb.tokenInterval) 48 | } 49 | -------------------------------------------------------------------------------- /token_bucket_test.go: -------------------------------------------------------------------------------- 1 | package mesh_test 2 | 3 | import "testing" 4 | 5 | func TestTokenBucket(t *testing.T) { 6 | t.Skip("TODO") 7 | } 8 | --------------------------------------------------------------------------------