├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── doc ├── README.md ├── edoc-info ├── erlang.png ├── overview.edoc ├── stylesheet.css ├── swim.md ├── swim_broadcasts.md ├── swim_membership.md ├── swim_messages.md ├── swim_subscriptions.md └── swim_transport.md ├── rebar.config ├── rebar.lock ├── src ├── swim.app.src ├── swim.erl ├── swim_app.erl ├── swim_awareness.erl ├── swim_broadcasts.erl ├── swim_failure.erl ├── swim_keyring.erl ├── swim_membership.erl ├── swim_messages.erl ├── swim_metrics.erl ├── swim_pushpull.erl ├── swim_pushpull_sup.erl ├── swim_socket.erl ├── swim_state.erl ├── swim_subscriptions.erl ├── swim_sup.erl └── swim_time.erl └── test ├── property_test ├── prop_swim_broadcasts.erl ├── prop_swim_keyring.erl ├── prop_swim_membership.erl └── prop_swim_messages.erl ├── swim_SUITE.erl ├── swim_failure_SUITE.erl ├── swim_generators.erl └── swim_test_client.erl /.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | ttb_last_config 3 | log/ 4 | *~ 5 | \#*\# 6 | .\#* 7 | *.beam 8 | TAGS 9 | .DS_Store 10 | .rebar3 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: erlang 2 | otp_release: 3 | - 20 4 | - 19 5 | script: make test 6 | cache: 7 | directories: 8 | - ~/.cache/rebar3/ 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | REBAR ?= $(shell which rebar3) 2 | 3 | .PHONY: test 4 | 5 | compile: 6 | $(REBAR) do xref, dialyzer 7 | 8 | test: 9 | $(REBAR) ct 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SWIM - An Awesome Weakly-consistent Infection-style Gossip Protocol # 2 | 3 | Copyright (c) 2015-2018 Tucker Barbour 4 | 5 | __Authors:__ Tucker Barbour ([`tucker.barbour@gmail.com`](mailto:tucker.barbour@gmail.com)). 6 | 7 | __References__* http://www.cs.cornell.edu/~asdas/research/dsn02-SWIM.pdf 8 | 9 | (__WARNING:__ This project is untested in production environments. Do not use in production.) 10 | 11 | ### Intro 12 | 13 | This Application is an Erlang implementation of the 14 | Scalable Weakly-consistent Infection-style Process Group 15 | Membership Protocol (SWIM). As the title implies, SWIM provides 16 | weakly-consistent knowledge of process group membership information to all 17 | participating processes. However, the [*Scalable* part of the title should read: 18 | *Awesome!*](http://erlangcentral.org/scalable-is-awesome-literally-garrett-smith-erlang-user-conference-2015/#.VZWtcXjEo22) 19 | So let's be more specific about what Awesome features SWIM provides: 20 | 21 | - Constant message load (bandwidth) per member regardless of the number 22 | of members in the group 23 | - Constant time to first-detection of a faulty process regardless of 24 | the number of members in the group 25 | - Low false-positive failure detection rate 26 | 27 | ### Project Status 28 | 29 | This project is still under active development and as such the API may change without warning. 30 | 31 | ### Use Cases 32 | 33 | What can we use SWIM for? 34 | 35 | - Reliable multicast 36 | - Epidemic-style information dissemination 37 | - Pub-sub 38 | - Generic peer-to-peer systems 39 | 40 | Really anything that requires each process in a group to maintain a local list 41 | of other non-faulty processes in the group and be notified when members join or 42 | leave, either voluntarily or through failure. 43 | 44 | ### Why? 45 | 46 | Other distributed membership algorithms tradionally use a heartbeating technique. 47 | The heartbeat technique calls for each process in the group to periodically 48 | send an incrementing heartbeat counter to all other processes in the group as well 49 | as respond to incoming heartbeats from other process. A process is detected as 50 | faulty when a heartbeat response is not received from a process in some 51 | period of time. Heartbeat implementations often suffer from scalability limitiations 52 | as the size of the process group grows. Some popular heartbeat architectures 53 | along with potential weaknesses: 54 | 55 | * Centralized - leads to hot-spots and single-point-of-failure 56 | * All-to-All - leads to message load on the network that grows quadratically with the group size 57 | * Logical Ring - unpredicability of failure detection time 58 | 59 | SWIM addresses the problems with tradional heartbeat implementations through a 60 | peer-to-peer randomized probing protocol. I recommend reading the SWIM paper 61 | for more details. 62 | 63 | ### Why Not? 64 | 65 | SWIM provides weak-consistency guarentees of group membership. 66 | If our domain requires stronger consistency of membership awareness then we 67 | should look elsewhere: 68 | 69 | - [Zookeeper](https://zookeeper.apache.org) 70 | - [Paxos](http://research.microsoft.com/en-us/um/people/lamport/pubs/paxos-simple.pdf) 71 | - [Raft](https://www.usenix.org/conference/atc14/technical-sessions/presentation/ongaro) 72 | - [Riak Ensemble](https://github.com/basho/riak_ensemble) 73 | 74 | What if we want more than just membership awareness and fault detection? Say 75 | we want application-level sharding like a consistent-hash ring? 76 | SWIM and this implemention only provide weakly-consistent membership awareness. 77 | You can use SWIM as the underlying gossip protocol to disseminate 78 | ring updates to the group -- but that's up to you and your application. You may 79 | be better off taking a look at other, more specific, implementions like: 80 | 81 | - [Ringpop](https://github.com/uber/ringpop) 82 | - [Plumtree](https://github.com/helium/plumtree) 83 | 84 | What if the information we need to disseminate to the group is large, on 85 | the order of MiB and GiB? This implementation of SWIM uses UDP for 86 | transport and thus has an upper limit on the size of information we can 87 | reliably send per message. Again, we can use SWIM for membership awareness and 88 | write our application logic using TCP to transmit our large data between members. 89 | It might also be worth taking a look at alternative implementations that have 90 | modified the protocol to support both UDP and TCP: 91 | 92 | - [Memberlist](https://github.com/hashicorp/memberlist) 93 | 94 | ### Lifeguard 95 | 96 | We've also included some of the improvements outlined in the [Lifeguard](https://arxiv.org/abs/1707.00788) paper from Hashicorp. You can also find more information about their research on [their website](https://www.hashicorp.com/blog/making-gossip-more-robust-with-lifeguard). On a local 5 node cluster, we have observed a reduction in false positives rates during the threshold experiment. More details of the results will be provided when we have time to conduct a more scientific experiement with this implementation. 97 | 98 | ### Build 99 | 100 | We require OTP-19.x and an OpenSSL that supports AES-GCM. The default on OSX 101 | does not include support for AES-GCM, so it's recommended you use `homebrew` to 102 | install a newer version of OpenSSL and compile OTP linking to the OpenSSL managed 103 | by `homebrew`. Include `--with-ssl=/usr/local/opt/openssl` when compiling OTP. 104 | 105 | 106 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # SWIM - An Awesome Weakly-consistent Infection-style Gossip Protocol # 4 | 5 | Copyright (c) 2015 Tucker Barbour 6 | 7 | __Version:__ Feb 18 2016 10:13:03 8 | 9 | __Authors:__ Tucker Barbour ([`barbct5@gmail.com`](mailto:barbct5@gmail.com)). 10 | 11 | __References__* http://www.cs.cornell.edu/~asdas/research/dsn02-SWIM.pdf 12 | 13 | [![Build Status](https://travis-ci.org/barbct5/swim.svg)](https://travis-ci.org/barbct5/swim) 14 | 15 | (__WARNING:__ This project is untested in production environments. Do not use in production.) 16 | 17 | ### Intro 18 | 19 | This Application is an Erlang implementation of the 20 | Scalable Weakly-consistent Infection-style Process Group 21 | Membership Protocol (SWIM). As the title implies, SWIM provides 22 | weakly-consistent knowledge of process group membership information to all 23 | participating processes. However, the [*Scalable* part of the title should read: 24 | *Awesome!*](http://erlangcentral.org/scalable-is-awesome-literally-garrett-smith-erlang-user-conference-2015/#.VZWtcXjEo22) 25 | So let's be more specific about what Awesome features SWIM provides: 26 | 27 | - Constant message load (bandwidth) per member regardless of the number 28 | of members in the group 29 | - Constant time to first-detection of a faulty process regardless of 30 | the number of members in the group 31 | - Low false-positive failure detection rate 32 | 33 | ### Use Cases 34 | 35 | What can we use SWIM for? 36 | 37 | - Reliable multicast 38 | - Epidemic-style information dissemination 39 | - Pub-sub 40 | - Generic peer-to-peer systems 41 | 42 | Really anything that requires each process in a group to maintain a local list 43 | of other non-faulty processes in the group and be notified when members join or 44 | leave, either voluntarily or through failure. 45 | 46 | ### Why? 47 | 48 | Other distributed membership algorithms tradionally use a heartbeating technique. 49 | The heartbeat technique calls for each process in the group to periodically 50 | send an incrementing heartbeat counter to all other processes in the group as well 51 | as respond to incoming heartbeats from other process. A process is detected as 52 | faulty when a heartbeat response is not received from a process in some 53 | period of time. Heartbeat implementations often suffer from scalability limitiations 54 | as the size of the process group grows. Some popular heartbeat architectures 55 | along with potential weaknesses: 56 | 57 | * Centralized - leads to hot-spots and single-point-of-failure 58 | * All-to-All - leads to message load on the network that grows quadratically with the group size 59 | * Logical Ring - unpredicability of failure detection time 60 | 61 | SWIM addresses the problems with tradional heartbeat implementations through a 62 | peer-to-peer randomized probing protocol. I recommend reading the SWIM paper 63 | for more details. 64 | 65 | ### Why Not? 66 | 67 | SWIM provides weak-consistency guarentees of group membership. 68 | If our domain requires stronger consistency of membership awareness then we 69 | should look elsewhere: 70 | 71 | - [Zookeeper](https://zookeeper.apache.org) 72 | - [Paxos](http://research.microsoft.com/en-us/um/people/lamport/pubs/paxos-simple.pdf) 73 | - [Raft](https://www.usenix.org/conference/atc14/technical-sessions/presentation/ongaro) 74 | - [Riak Ensemble](https://github.com/basho/riak_ensemble) 75 | 76 | What if we want more than just membership awareness and fault detection? Say 77 | we want application-level sharding like a consistent-hash ring? 78 | SWIM and this implemention only provide weakly-consistent membership awareness. 79 | You can use SWIM as the underlying gossip protocol to disseminate 80 | ring updates to the group -- but that's up to you and your application. You may 81 | be better off taking a look at other, more specific, implementions like: 82 | 83 | - [Ringpop](https://github.com/uber/ringpop) 84 | - [Plumtree](https://github.com/helium/plumtree) 85 | 86 | What if the information we need to disseminate to the group is large, on 87 | the order of MiB and GiB? This implementation of SWIM uses UDP for 88 | transport and thus has an upper limit on the size of information we can 89 | reliably send per message. Again, we can use SWIM for membership awareness and 90 | write our application logic using TCP to transmit our large data between members. 91 | It might also be worth taking a look at alternative implementations that have 92 | modified the protocol to support both UDP and TCP: 93 | 94 | - [Memberlist](https://github.com/hashicorp/memberlist) 95 | 96 | ### Details 97 | 98 | If you made it this far and are still interested, you should read the module 99 | documentation which includes details about the implementation. 100 | The pieces of the SWIM protocol are broken down as follows: 101 | 102 | * __*Failure Detection*__ - [`swim`](swim.md) 103 | * __*Membership*__ - [`swim_membership`](swim_membership.md) 104 | * __*Dissemination*__ - [`swim_broadcasts`](swim_broadcasts.md) 105 | 106 | ### How To 107 | 108 | Here is a quick reference for using SWIM in your application. These examples 109 | assume the `crypto` application is already started. We also assume encryption 110 | keys have already been distributed -- that`s outside the scope of SWIM. 111 | 112 | ```erlang 113 | 114 | % On our first node, let us start the seed listening at 192.168.2.10:5000 115 | {ok, Lan} = swim:start_link(lan, {{192,168,2,10}, 5000}, Keys, []). 116 | 117 | % On a different node, let us join the group lan using 192.168.2.10:5000 118 | % as the seed. 119 | Seeds = [{{192,168,2,10}, 5000}]. 120 | {ok, Lan} = swim:start_link(lan, {{192,168,2,11}, 5000}, Keys, [{seeds, Seeds}]). 121 | 122 | % Let us check who else is in the group 123 | swim:members(lan). 124 | 125 | % By default the parent process will receive messages when group membership 126 | % changes. User-provided messages, send via swim:publish/2, are sent to the 127 | % parent process as well. You can match against the different types of messages 128 | % and membership changes as see below: 129 | 130 | receive 131 | {swim, {membership, {alive, Member, _Incarnation}}} -> 132 | ok = error_logger:info_msg("Member is alive: ~p", [Member]); 133 | {swim, {membership, {faulty, Member, _Incarnation}}} -> 134 | ok = error_logger:info_msg("Member is dead: ~p", [Member]); 135 | {swim, {user, Msg}} -> 136 | ok = error_logger:info_msg("Received user message: ~p", [Msg]) 137 | after 138 | 5000 -> 139 | timeout 140 | end. 141 | 142 | % Other non-parent processes can subscribe to receive messages about group 143 | % membership changes as well as user-provided messages. 144 | % Let us subscribe to these events: 145 | 146 | swim:subscribe(lan). 147 | 148 | ``` 149 | 150 | ### Build 151 | 152 | We require OTP-18.x and an OpenSSL that supports AES-GCM. The default on OSX 153 | does not include support for AES-GCM, so it's recommended you use `homebrew` to 154 | install a newer version of OpenSSL and compile OTP linking to the OpenSSL managed 155 | by `homebrew`. Include `--with-ssl=/usr/local/opt/openssl` when compiling OTP. 156 | 157 | We use `rebar3`, included in the source of this repo, to build and test `swim`. 158 | 159 | ``` 160 | 161 | ./rebar3 do xref, dialyzer, eunit 162 | 163 | ``` 164 | 165 | ## Modules ## 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 |
swim
swim_broadcasts
swim_membership
swim_messages
174 | 175 | 176 | -------------------------------------------------------------------------------- /doc/edoc-info: -------------------------------------------------------------------------------- 1 | %% encoding: UTF-8 2 | {application,swim}. 3 | {modules,[swim,swim_broadcasts,swim_membership,swim_messages, 4 | swim_subscriptions,swim_transport]}. 5 | -------------------------------------------------------------------------------- /doc/erlang.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ctbarbour/swim/ca3fe4f45408c4e95e6b8e00662e443ba710737c/doc/erlang.png -------------------------------------------------------------------------------- /doc/overview.edoc: -------------------------------------------------------------------------------- 1 | @author Tucker Barbour 2 | @copyright 2015-2018 Tucker Barbour 3 | @version {@version} 4 | @title SWIM - An Awesome Weakly-consistent Infection-style Gossip Protocol 5 | @reference http://www.cs.cornell.edu/~asdas/research/dsn02-SWIM.pdf 6 | @doc 7 | 8 | [![Build Status](https://travis-ci.org/barbct5/swim.svg)](https://travis-ci.org/barbct5/swim) 9 | 10 | 11 | (__WARNING:__ This project is untested in production environments. Do not use in production.) 12 | 13 | 14 | ### Intro 15 | 16 | This Application is an Erlang implementation of the 17 | Scalable Weakly-consistent Infection-style Process Group 18 | Membership Protocol (SWIM). As the title implies, SWIM provides 19 | weakly-consistent knowledge of process group membership information to all 20 | participating processes. However, the [*Scalable* part of the title should read: 21 | *Awesome!*](http://erlangcentral.org/scalable-is-awesome-literally-garrett-smith-erlang-user-conference-2015/#.VZWtcXjEo22) 22 | So let's be more specific about what Awesome features SWIM provides: 23 | 24 | - Constant message load (bandwidth) per member regardless of the number 25 | of members in the group 26 | - Constant time to first-detection of a faulty process regardless of 27 | the number of members in the group 28 | - Low false-positive failure detection rate 29 | 30 | ### Use Cases 31 | 32 | What can we use SWIM for? 33 | 34 | - Reliable multicast 35 | - Epidemic-style information dissemination 36 | - Pub-sub 37 | - Generic peer-to-peer systems 38 | 39 | Really anything that requires each process in a group to maintain a local list 40 | of other non-faulty processes in the group and be notified when members join or 41 | leave, either voluntarily or through failure. 42 | 43 | ### Why? 44 | 45 | Other distributed membership algorithms tradionally use a heartbeating technique. 46 | The heartbeat technique calls for each process in the group to periodically 47 | send an incrementing heartbeat counter to all other processes in the group as well 48 | as respond to incoming heartbeats from other process. A process is detected as 49 | faulty when a heartbeat response is not received from a process in some 50 | period of time. Heartbeat implementations often suffer from scalability limitiations 51 | as the size of the process group grows. Some popular heartbeat architectures 52 | along with potential weaknesses: 53 | 54 | * Centralized - leads to hot-spots and single-point-of-failure 55 | * All-to-All - leads to message load on the network that grows quadratically with the group size 56 | * Logical Ring - unpredicability of failure detection time 57 | 58 | SWIM addresses the problems with tradional heartbeat implementations through a 59 | peer-to-peer randomized probing protocol. I recommend reading the SWIM paper 60 | for more details. 61 | 62 | ### Why Not? 63 | 64 | SWIM provides weak-consistency guarentees of group membership. 65 | If our domain requires stronger consistency of membership awareness then we 66 | should look elsewhere: 67 | 68 | - [Zookeeper](https://zookeeper.apache.org) 69 | - [Paxos](http://research.microsoft.com/en-us/um/people/lamport/pubs/paxos-simple.pdf) 70 | - [Raft](https://www.usenix.org/conference/atc14/technical-sessions/presentation/ongaro) 71 | - [Riak Ensemble](https://github.com/basho/riak_ensemble) 72 | 73 | What if we want more than just membership awareness and fault detection? Say 74 | we want application-level sharding like a consistent-hash ring? 75 | SWIM and this implemention only provide weakly-consistent membership awareness. 76 | You can use SWIM as the underlying gossip protocol to disseminate 77 | ring updates to the group -- but that's up to you and your application. You may 78 | be better off taking a look at other, more specific, implementions like: 79 | 80 | - [Ringpop](https://github.com/uber/ringpop) 81 | - [Plumtree](https://github.com/helium/plumtree) 82 | 83 | What if the information we need to disseminate to the group is large, on 84 | the order of MiB and GiB? This implementation of SWIM uses UDP for 85 | transport and thus has an upper limit on the size of information we can 86 | reliably send per message. Again, we can use SWIM for membership awareness and 87 | write our application logic using TCP to transmit our large data between members. 88 | It might also be worth taking a look at alternative implementations that have 89 | modified the protocol to support both UDP and TCP: 90 | 91 | - [Memberlist](https://github.com/hashicorp/memberlist) 92 | 93 | ### Details 94 | 95 | If you made it this far and are still interested, you should read the module 96 | documentation which includes details about the implementation. 97 | The pieces of the SWIM protocol are broken down as follows: 98 | 99 | * __*Failure Detection*__ - {@link swim} 100 | * __*Membership*__ - {@link swim_membership} 101 | * __*Dissemination*__ - {@link swim_broadcasts} 102 | 103 | ### How To 104 | 105 | Here is a quick reference for using SWIM in your application. These examples 106 | assume the `crypto` application is already started. We also assume encryption 107 | keys have already been distributed -- that's outside the scope of SWIM. 108 | 109 |
110 | % On our first node, let us start the seed listening at 192.168.2.10:5000
111 | {ok, Lan} = swim:start_link(lan, {{192,168,2,10}, 5000}, Keys, []).
112 | 
113 | % On a different node, let us join the group `lan' using 192.168.2.10:5000
114 | % as the seed.
115 | Seeds = [{{192,168,2,10}, 5000}].
116 | {ok, Lan} = swim:start_link(lan, {{192,168,2,11}, 5000}, Keys, [{seeds, Seeds}]).
117 | 
118 | % Let us check who else is in the group
119 | swim:members(lan).
120 | 
121 | % By default the parent process will receive messages when group membership
122 | % changes. User-provided messages, send via `swim:publish/2', are sent to the
123 | % parent process as well. You can match against the different types of messages
124 | % and membership changes as see below:
125 | 
126 | receive
127 |     {swim, {membership, {alive, Member, _Incarnation}}} ->
128 |         ok = error_logger:info_msg("Member is alive: ~p", [Member]);
129 |     {swim, {membership, {faulty, Member, _Incarnation}}} ->
130 |         ok = error_logger:info_msg("Member is dead: ~p", [Member]);
131 |     {swim, {user, Msg}} ->
132 |         ok = error_logger:info_msg("Received user message: ~p", [Msg])
133 | after
134 |     5000 ->
135 |         timeout
136 | end.
137 | 
138 | % Other non-parent processes can subscribe to receive messages about group
139 | % membership changes as well as user-provided messages.
140 | % Let us subscribe to these events:
141 | 
142 | swim:subscribe(lan).
143 | 
144 | 145 | ### Build 146 | 147 | We require OTP-18.x and an OpenSSL that supports AES-GCM. The default on OSX 148 | does not include support for AES-GCM, so it's recommended you use `homebrew' to 149 | install a newer version of OpenSSL and compile OTP linking to the OpenSSL managed 150 | by `homebrew'. Include `--with-ssl=/usr/local/opt/openssl' when compiling OTP. 151 | 152 | We use `rebar3', included in the source of this repo, to build and test `swim'. 153 | 154 |
155 | ./rebar3 do xref, dialyzer, eunit
156 | 
157 | 158 | ## Modules ## 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 |
swim
swim_broadcasts
swim_membership
swim_messages
167 | 168 | @end 169 | -------------------------------------------------------------------------------- /doc/stylesheet.css: -------------------------------------------------------------------------------- 1 | /* standard EDoc style sheet */ 2 | body { 3 | font-family: Verdana, Arial, Helvetica, sans-serif; 4 | margin-left: .25in; 5 | margin-right: .2in; 6 | margin-top: 0.2in; 7 | margin-bottom: 0.2in; 8 | color: #000000; 9 | background-color: #ffffff; 10 | } 11 | h1,h2 { 12 | margin-left: -0.2in; 13 | } 14 | div.navbar { 15 | background-color: #add8e6; 16 | padding: 0.2em; 17 | } 18 | h2.indextitle { 19 | padding: 0.4em; 20 | background-color: #add8e6; 21 | } 22 | h3.function,h3.typedecl { 23 | background-color: #add8e6; 24 | padding-left: 1em; 25 | } 26 | div.spec { 27 | margin-left: 2em; 28 | background-color: #eeeeee; 29 | } 30 | a.module { 31 | text-decoration:none 32 | } 33 | a.module:hover { 34 | background-color: #eeeeee; 35 | } 36 | ul.definitions { 37 | list-style-type: none; 38 | } 39 | ul.index { 40 | list-style-type: none; 41 | background-color: #eeeeee; 42 | } 43 | 44 | /* 45 | * Minor style tweaks 46 | */ 47 | ul { 48 | list-style-type: square; 49 | } 50 | table { 51 | border-collapse: collapse; 52 | } 53 | td { 54 | padding: 3 55 | } 56 | -------------------------------------------------------------------------------- /doc/swim.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Module swim # 4 | * [Data Types](#types) 5 | * [Function Index](#index) 6 | * [Function Details](#functions) 7 | 8 | 9 | 10 | ## Data Types ## 11 | 12 | 13 | 14 | 15 | ### swim_opt() ### 16 | 17 | 18 |

19 | swim_opt() = {protocol_period, pos_integer()} | {ack_proxies, pos_integer()} | {ack_timeout, pos_integer()}
20 | 
21 | 22 | 23 | 24 | ## Function Index ## 25 | 26 | 27 |
child_spec/4
local_member/1
members/1
publish/2
rotate_keys/2
start_link/3
start_link/4
stop/1
subscribe/1
28 | 29 | 30 | 31 | 32 | ## Function Details ## 33 | 34 | 35 | 36 | ### child_spec/4 ### 37 | 38 | `child_spec(Name, LocalMember, Keys, Opts) -> any()` 39 | 40 | 41 | 42 | ### local_member/1 ### 43 | 44 | `local_member(Pid) -> any()` 45 | 46 | 47 | 48 | ### members/1 ### 49 | 50 | `members(Pid) -> any()` 51 | 52 | 53 | 54 | ### publish/2 ### 55 | 56 | `publish(Pid, Event) -> any()` 57 | 58 | 59 | 60 | ### rotate_keys/2 ### 61 | 62 | `rotate_keys(Pid, NewKey) -> any()` 63 | 64 | 65 | 66 | ### start_link/3 ### 67 | 68 |

69 | start_link(LocalMember::member(), Keys::[key()], Opts::[swim_opt() | swim_membership:swim_membership_opt()]) -> {ok, pid()}
70 | 
71 |
72 | 73 | 74 | 75 | ### start_link/4 ### 76 | 77 |

78 | start_link(Name::atom(), LocalMember::member(), Keys::[key()], Opts::[swim_opt() | swim_membership:swim_membership_opt()]) -> {ok, pid()}
79 | 
80 |
81 | 82 | 83 | 84 | ### stop/1 ### 85 | 86 | `stop(Pid) -> any()` 87 | 88 | 89 | 90 | ### subscribe/1 ### 91 | 92 | `subscribe(Pid) -> any()` 93 | 94 | -------------------------------------------------------------------------------- /doc/swim_broadcasts.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Module swim_broadcasts # 4 | * [Description](#description) 5 | * [Function Index](#index) 6 | * [Function Details](#functions) 7 | 8 | This module is responsible for maintaining membership updates and user 9 | provided events along with their state as a part of the 10 | infection-style dissemination component of the SWIM protocol. 11 | 12 | Copyright (c) 2015 13 | 14 | __Version:__ Feb 18 2016 10:13:03 15 | 16 | 17 | 18 | ## Description ## 19 | 20 | ### Infection-Style Dissemination 21 | As an alternative to IP Multicast 22 | or a point-to-point messaging scheme the SWIM protocol 23 | disseminates membership updates by piggybacking on messages sent 24 | as a part of the failure detection protocol. Thus, implementation 25 | does not generate any extra packets to send membership updates. 26 | 27 | Here, `swim_broadcasts` maintains the buffer of recent membership 28 | events along with a count for each event. The local count 29 | specifies the number of times the event has been piggybacked so 30 | far by this member and is used to choose which events to piggyback 31 | next. Each event is piggybacked at most `Retransmit * log(N + 32 | 1)` times, where `Retransmit` is a configurable parameter. 33 | If the size of events in the buffer is larger than the maximum number of 34 | events that can be piggybacked on a single PING or ACK, events that have 35 | been gossiped fewer times are preferred. This is needed as the 36 | protocol period is fixed and the rate of membership changes might 37 | temporarily overwhelm the speed of dissemination. Preferring 38 | "younger" events under such circumstances ensures that all 39 | membership changes infect at least a few members - when the 40 | membership change rate quiesces, older events will 41 | propagate through the rest of the gossip group. Membership events are always 42 | preferred over user-provided events. 43 | 44 | 45 | ## Function Index ## 46 | 47 | 48 |
dequeue/2
dequeue/3Dequeues a set of encoded events ready to be broadcast to other members 49 | in the group.
handle_info/2
max_transmissions/2Calculates the maximum number of times an event should be broadcast.
membership/2Queues a membership event to be broadcast to other members in the group.
user/2Queues a user event to be broadcast to other members in the group.
50 | 51 | 52 | 53 | 54 | ## Function Details ## 55 | 56 | 57 | 58 | ### dequeue/2 ### 59 | 60 |

 61 | dequeue(EventMgrPid::pid(), NumMembers::pos_integer()) -> binary()
 62 | 
63 |
64 | 65 | 66 | 67 | ### dequeue/3 ### 68 | 69 |

 70 | dequeue(EventMgrPid::pid() | module(), NumMembers::pos_integer(), MaxSize::pos_integer()) -> binary()
 71 | 
72 |
73 | 74 | Dequeues a set of encoded events ready to be broadcast to other members 75 | in the group 76 | 77 | Events to be broadcast are determined by the number of the peers as well as 78 | the size limitation provided by `MaxSize`. Membership events always take 79 | precedence over user events. Events are broadcast up to a max of 80 | determined by [`max_tranmissions/2`](#max_tranmissions-2). If the number of events 81 | exceeds the maximum number of events allowable under `MaxSize`, events that have 82 | been broadcast fewer times are preferred. This is needed as the rate of 83 | incoming events, i.e. membership changes, might temporarily overwhelm 84 | the speed of dissemination. 85 | Preferring younger events ensures that all events 86 | infect at least a few members. Events that have exceeded 87 | their retransmit limit are removed from the broadcasts. Events that are 88 | returned have their number of retransmissions incremented by 1. 89 | 90 | 91 | 92 | ### handle_info/2 ### 93 | 94 | `handle_info(Info, State) -> any()` 95 | 96 | 97 | 98 | ### max_transmissions/2 ### 99 | 100 |

101 | max_transmissions(NumMembers::pos_integer(), RetransmitFactor::pos_integer()) -> pos_integer()
102 | 
103 |
104 | 105 | Calculates the maximum number of times an event should be broadcast. 106 | 107 | 108 | 109 | ### membership/2 ### 110 | 111 |

112 | membership(EventMgrPid::pid(), Event::{member_status(), member(), incarnation()}) -> ok
113 | 
114 |
115 | 116 | Queues a membership event to be broadcast to other members in the group 117 | 118 | 119 | 120 | ### user/2 ### 121 | 122 |

123 | user(EventMgrPid::pid(), Event::term()) -> ok
124 | 
125 |
126 | 127 | Queues a user event to be broadcast to other members in the group 128 | 129 | -------------------------------------------------------------------------------- /doc/swim_membership.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Module swim_membership # 4 | * [Description](#description) 5 | * [Data Types](#types) 6 | * [Function Index](#index) 7 | * [Function Details](#functions) 8 | 9 | This module is responsible for maintaining the list and status of non 10 | faulty members in a gossip group through the use of the Suspicion Mechanism 11 | described in the SWIM Paper. 12 | 13 | Copyright (c) 2015 14 | 15 | __Version:__ Feb 18 2016 10:13:03 16 | 17 | 18 | 19 | ## Description ## 20 | A `swim_membership` process becomes aware of 21 | membership changes through exported function defined for a specific member 22 | status, [`alive/3`](#alive-3), [`suspect/3`](#suspect-3), [`faulty/3`](#faulty-3), as determined 23 | by the Failure Detection mechanism of SWIM implemented in [`swim`](swim.md). Member 24 | state includes the locally known status of a member as well as a logical clock 25 | for the member's status known in the SWIM paper as the incarnation. 26 | When the status of a member changes events are sent to [`swim_broadcast`](swim_broadcast.md) 27 | to be broadcast to the rest of the members in the gossip group. 28 | 29 | 30 | 31 | ## Data Types ## 32 | 33 | 34 | 35 | 36 | ### swim_membership_opt() ### 37 | 38 | 39 |

 40 | swim_membership_opt() = {seeds, [member()]} | {suspicion_factor, pos_integer()} | {protocol_period, pos_integer()}
 41 | 
42 | 43 | 44 | 45 | ## Function Index ## 46 | 47 | 48 |
alive/3Set the member status to alive.
faulty/3Remove the member from the group.
local_member/1The identifier for the local member.
members/1A list of known members and their status.
num_members/1The number of known members in the gossip group, including the local member.
opts/1
set_status/3Forcibly set the status of a member.
start_link/3
suspect/3Set the member status to suspect.
49 | 50 | 51 | 52 | 53 | ## Function Details ## 54 | 55 | 56 | 57 | ### alive/3 ### 58 | 59 |

 60 | alive(Pid::pid(), Member::member(), Incarnation::incarnation()) -> [{member_status(), member(), incarnation()}]
 61 | 
62 |
63 | 64 | Set the member status to alive 65 | 66 | If the member isn't known it's added to the membership and an event is 67 | broadcast to the group. If the member is known and the incarnation is 68 | greater than the current incarnation of the member, we update the incarnation 69 | of member and broadcast an event to group. Otherwise, we do nothing. 70 | 71 | 72 | 73 | ### faulty/3 ### 74 | 75 |

 76 | faulty(Pid::pid(), Member::member(), Incarnation::incarnation()) -> [{member_status(), member(), incarnation()}]
 77 | 
78 |
79 | 80 | Remove the member from the group 81 | 82 | If the member isn't already known we do nothing. If the member is known 83 | we remove the member and broadcast the change if the provided incarnation is 84 | greater than the current incarnation of the member. 85 | 86 | 87 | 88 | ### local_member/1 ### 89 | 90 |

 91 | local_member(Pid::pid()) -> member()
 92 | 
93 |
94 | 95 | The identifier for the local member 96 | 97 | 98 | 99 | ### members/1 ### 100 | 101 |

102 | members(Pid::pid()) -> [{member_status(), member(), incarnation()}]
103 | 
104 |
105 | 106 | A list of known members and their status 107 | 108 | 109 | 110 | ### num_members/1 ### 111 | 112 |

113 | num_members(Pid::pid()) -> pos_integer()
114 | 
115 |
116 | 117 | The number of known members in the gossip group, including the local member 118 | 119 | 120 | 121 | ### opts/1 ### 122 | 123 |

124 | opts(Opts::list()) -> [swim_membership_opt()]
125 | 
126 |
127 | 128 | 129 | 130 | ### set_status/3 ### 131 | 132 |

133 | set_status(Pid::pid(), Member::member(), Status::member_status()) -> ok
134 | 
135 |
136 | 137 | Forcibly set the status of a member 138 | 139 | In certain circumstances we want to be able to set the status of a member 140 | without regard to the rules of the suspecicon mechanism which uses a member's 141 | current status and incarnation. 142 | 143 | 144 | 145 | ### start_link/3 ### 146 | 147 |

148 | start_link(LocalMember::member(), EventMgrPid::pid(), Opts::[swim_membership_opt()]) -> {ok, pid()}
149 | 
150 |
151 | 152 | 153 | 154 | ### suspect/3 ### 155 | 156 |

157 | suspect(Pid::pid(), Member::member(), Incarnation::incarnation()) -> [{member_status(), member(), incarnation()}]
158 | 
159 |
160 | 161 | Set the member status to suspect 162 | 163 | If the member isn't already known we do nothing. If the member is known 164 | we update the status and broadcast the change on the follow conditions. If 165 | the current status of the member is alive and the incarnation is greater than 166 | or equal to the known incarnation of the member, we update the member's status 167 | to suspect and broadcast the change. If the current status of the member is 168 | suspect and the incarnation is greater than the known incarnation, we update 169 | the member's status to suspect, set the known incarnation to the provided 170 | incarnation and broadcast the change. 171 | If the suspected member is the local member we refute by incrementing our own 172 | incarnation and broadcasting the change to the group. 173 | 174 | -------------------------------------------------------------------------------- /doc/swim_messages.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Module swim_messages # 4 | * [Description](#description) 5 | * [Function Index](#index) 6 | * [Function Details](#functions) 7 | 8 | This module is responsible for encoding and decoding SWIM protocol 9 | messages as well as encrypting and decrypting the message payloads. 10 | 11 | Copyright (c) 2015 12 | 13 | __Version:__ Feb 18 2016 10:13:03 14 | 15 | 16 | 17 | ## Description ## 18 | 19 | SWIM protocol message encodings can be found in the documentation 20 | cooresponding to the various encoding functions defined in this module. 21 | [`encode_ack/3`](#encode_ack-3), [`encode_ping/2`](#encode_ping-2), [`encode_ping_req/2`](#encode_ping_req-2), 22 | [`encode_leave/1`](#encode_leave-1). 23 | All SWIM protocol messages are prefixed with a single octet reflecting 24 | the protocol version of the message. The overall format of SWIM messages is: 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 |
11N
VersionTagData
39 | 40 | 41 | - __*Version*__ : is the protocol version the message is encoded for 42 | - __*Tag*__ : indicates what type of SWIM message Data represents; ACK, PING, 43 | PING-REG, or LEAVE 44 | - __*Data*__ : The SWIM messages payload 45 | 46 | All SWIM messages are encrypted over the wire using AES128-GCM. See 47 | [`encrypt/3`](#encrypt-3) for more information. The encryption header is encoded as 48 | follows: 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
1616N
IVCipherTagCipherText
63 | 64 | 65 | 66 | ## Function Index ## 67 | 68 | 69 |
decode/1Decodes the provided message from a binary to an Erlang Term.
decode_event/2
decode_events/1
decrypt/3Verifies the authenticity of the payload and decrypts the ciphertext 70 | generated by encrypt/3.
encode_ack/3Encodes an ACK message, piggybacking membership and user events.
encode_event/1Encode either a membership event or a user event.
encode_events/1Encodes a list of swim events.
encode_leave/1Encodes a LEAVE message.
encode_member/1Encodes a Member as the IP address and port number combination.
encode_ping/2Encodes a PING message, piggybacking membership and user events.
encode_ping_req/2Encodes a PING-REQ message.
encrypt/3Encrypts the provided plain text using the Advanced Encryption Standard 71 | (AES) in Galois/Counter (GCM) using the provided 32-octet Key, 72 | Associated Authenticated Data (AAD), and a randomly generated 73 | Initialization Vector (IV).
event_size_limit/0Event size limit determines the maximum size (in octets) available to 74 | to piggyback membership and user events on an ACK or PING message.
75 | 76 | 77 | 78 | 79 | ## Function Details ## 80 | 81 | 82 | 83 | ### decode/1 ### 84 | 85 |

 86 | decode(Message::binary()) -> swim_message()
 87 | 
88 |
89 | 90 | Decodes the provided message from a binary to an Erlang Term. 91 | 92 | All messages are prefixed with a single octet to indicate the version of 93 | of the protocol. The return value is an Erlang term of the message. If the 94 | version is not supported or the message is malformed, an exception is thrown. 95 | 96 | 97 | 98 | ### decode_event/2 ### 99 | 100 |

101 | decode_event(X1::membership | user, X2::binary()) -> {swim_event(), binary()}
102 | 
103 |
104 | 105 | 106 | 107 | ### decode_events/1 ### 108 | 109 |

110 | decode_events(Events::binary()) -> [swim_event()]
111 | 
112 |
113 | 114 | 115 | 116 | ### decrypt/3 ### 117 | 118 |

119 | decrypt(Key::<<_:256>>, AAD::binary(), Payload::binary()) -> binary() | {error, failed_verification}
120 | 
121 |
122 | 123 | Verifies the authenticity of the payload and decrypts the ciphertext 124 | generated by [`encrypt/3`](#encrypt-3). Note the keys used as input to [`encrypt/3`](#encrypt-3) 125 | must be identical to those provided here. Decrypt is not responsible for 126 | decoding the underlying Swim protocol message -- see [`decode/1`](#decode-1). 127 | 128 | 129 | 130 | ### encode_ack/3 ### 131 | 132 |

133 | encode_ack(Seq::sequence(), Target::member(), Events::[swim_event()] | binary()) -> binary() | no_return()
134 | 
135 |
136 | 137 | Encodes an ACK message, piggybacking membership and user events. 138 | 139 | An ACK message has the following format: 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 |
146N
2SequenceMemberEvents
156 | 157 | 158 | 159 | 160 | 161 |
Sequence
162 | 163 | 164 | 165 | 166 |
is the the same Sequence received in the coorisponding PING message
167 | 168 | 169 | 170 | 171 |
Member
172 | 173 | 174 | 175 | 176 |
is the terminal Member for the coorisponding PING. In the case of 177 | a PING-REQ, the Member is not the sender of this ACK. 178 | See encode_member/1 for Member encoding.
179 | 180 | 181 | 182 | 183 |
Events
184 | 185 | 186 | 187 | 188 |
is a list of membership and user events piggybacked as a part of the 189 | dissemination protocol. 190 | See encode_events/1 for Event encoding.
191 | 192 | 193 | 194 | 195 | 196 | ### encode_event/1 ### 197 | 198 |

199 | encode_event(Event::swim_event() | binary()) -> binary()
200 | 
201 |
202 | 203 | Encode either a membership event or a user event. 204 | 205 | A membership event is encoded as follows: 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 |
1164
50StatusMemberIncarnation
222 | 223 | 224 | 225 | 226 | 227 |
Status
228 | 229 | 230 | 231 | 232 |
is observed status of the Member being broadcast to the group
233 | 234 | 235 | 236 | 237 |
Member
238 | 239 | 240 | 241 | 242 |
is the subject of this membership event
243 | 244 | 245 | 246 | 247 |
Incarnation
248 | 249 | 250 | 251 | 252 |
is the incarnation of the subject Member known by the sender of this 253 | event. See swim_membership for more information on Incarnations.
254 | 255 | 256 | 257 | A user event is encoded as follows: 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 |
12Size
51SizeErlang Term
272 | 273 | 274 | 275 | 276 | ### encode_events/1 ### 277 | 278 |

279 | encode_events(Events::[swim_event() | binary()]) -> binary()
280 | 
281 |
282 | 283 | Encodes a list of swim events. See [`encode_event/1`](#encode_event-1). 284 | 285 | 286 | 287 | ### encode_leave/1 ### 288 | 289 |

290 | encode_leave(Seq::sequence()) -> binary()
291 | 
292 |
293 | 294 | Encodes a LEAVE message. 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 |
14
4Sequence
307 | 308 | 309 | 310 | 311 | 312 |
Sequence
313 | 314 | 315 | 316 | 317 |
is the iteration of the failure detection protocol the leave message 318 | was sent during
319 | 320 | 321 | 322 | 323 | 324 | ### encode_member/1 ### 325 | 326 |

327 | encode_member(Member::{inet:ip_address(), inet:port_number()}) -> <<_:48>> | <<_:96>>
328 | 
329 |
330 | 331 | Encodes a Member as the IP address and port number combination. 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 |
1Size2
SizeIP AddressPort Number
346 | 347 | 348 | 349 | 350 | 351 |
IP Address
352 | 353 | 354 | 355 | 356 |
is the IPv4 or IPv6 address the Member can be reached
357 | 358 | 359 | 360 | 361 |
Port Number
362 | 363 | 364 | 365 | 366 |
is the associated Port Number the Member is listening on
367 | 368 | 369 | 370 | 371 | 372 | ### encode_ping/2 ### 373 | 374 |

375 | encode_ping(Seq::sequence(), Events::[swim_event()] | binary()) -> binary() | no_return()
376 | 
377 |
378 | 379 | Encodes a PING message, piggybacking membership and user events. 380 | 381 | A PING message has the following format: 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 |
14N
1SequenceEvents
396 | 397 | 398 | 399 | 400 | ### encode_ping_req/2 ### 401 | 402 |

403 | encode_ping_req(Seq::sequence(), Target::member()) -> binary()
404 | 
405 |
406 | 407 | Encodes a PING-REQ message. 408 | 409 | A PING-REQ message has the following format: 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 |
146
3SequenceMember
424 | 425 | 426 | 427 | 428 | 429 |
Sequence
430 | 431 | 432 | 433 | 434 |
is the iteration of the failure detection protocol the PING-REQ was 435 | sent during
436 | 437 | 438 | 439 | 440 |
Member
441 | 442 | 443 | 444 | 445 |
the terminal of the PING-REQ. The receiver of the PING-REQ is the proxy 446 | for the PING. See encode_member/1 for Member encoding.
447 | 448 | 449 | 450 | 451 | 452 | ### encrypt/3 ### 453 | 454 |

455 | encrypt(Key::<<_:256>>, AAD::binary(), PlainText::binary()) -> binary()
456 | 
457 |
458 | 459 | Encrypts the provided plain text using the Advanced Encryption Standard 460 | (AES) in Galois/Counter (GCM) using the provided 32-octet Key, 461 | Associated Authenticated Data (AAD), and a randomly generated 462 | Initialization Vector (IV). The resulting payload includes the 16-octet IV, 463 | the 16-octet CipherTag and the block encrypted cipher text. 464 | 465 | 466 | 467 | ### event_size_limit/0 ### 468 | 469 |

470 | event_size_limit() -> '?MAX_EVENT_SIZE'
471 | 
472 |
473 | 474 | Event size limit determines the maximum size (in octets) available to 475 | to piggyback membership and user events on an ACK or PING message. 476 | 477 | The max message size we use is the minimum reassembly buffer size defined for 478 | IPv4 to avoid IP fragmentation -- 576 octets. 479 | UDP has and overhead of a 20 octet IP header and an 8 octet 480 | UDP header. The max swim message size is 40 octets, with 16 octets for the 481 | nonce, and 16 octets for the CipherTag. That leaves 476 octets for the events. 482 | A membership event is 41 octets which equates to 11 membership events per 483 | ACK/PING message. User messages have an over head of 9 octets, leaving 467 484 | octets for the user message payload. 485 | 486 | -------------------------------------------------------------------------------- /doc/swim_subscriptions.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Module swim_subscriptions # 4 | * [Function Index](#index) 5 | * [Function Details](#functions) 6 | 7 | 8 | 9 | ## Function Index ## 10 | 11 | 12 |
code_change/3
handle_call/2
handle_event/2
handle_info/2
init/1
subscribe/3
terminate/2
13 | 14 | 15 | 16 | 17 | ## Function Details ## 18 | 19 | 20 | 21 | ### code_change/3 ### 22 | 23 | `code_change(OldVsn, State, Extra) -> any()` 24 | 25 | 26 | 27 | ### handle_call/2 ### 28 | 29 | `handle_call(Msg, State) -> any()` 30 | 31 | 32 | 33 | ### handle_event/2 ### 34 | 35 | `handle_event(Event, State) -> any()` 36 | 37 | 38 | 39 | ### handle_info/2 ### 40 | 41 | `handle_info(Info, State) -> any()` 42 | 43 | 44 | 45 | ### init/1 ### 46 | 47 | `init(X1) -> any()` 48 | 49 | 50 | 51 | ### subscribe/3 ### 52 | 53 | `subscribe(EventMgrPid, EventCategory, Pid) -> any()` 54 | 55 | 56 | 57 | ### terminate/2 ### 58 | 59 | `terminate(Reason, State) -> any()` 60 | 61 | -------------------------------------------------------------------------------- /doc/swim_transport.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Module swim_transport # 4 | * [Function Index](#index) 5 | * [Function Details](#functions) 6 | 7 | 8 | 9 | ## Function Index ## 10 | 11 | 12 |
close/1
code_change/3
handle_call/3
handle_cast/2
handle_info/2
init/1
rotate_keys/2
send/4
start_link/3
terminate/2
13 | 14 | 15 | 16 | 17 | ## Function Details ## 18 | 19 | 20 | 21 | ### close/1 ### 22 | 23 | `close(Pid) -> any()` 24 | 25 | 26 | 27 | ### code_change/3 ### 28 | 29 | `code_change(OldVsn, State, Extra) -> any()` 30 | 31 | 32 | 33 | ### handle_call/3 ### 34 | 35 | `handle_call(Msg, From, State) -> any()` 36 | 37 | 38 | 39 | ### handle_cast/2 ### 40 | 41 | `handle_cast(Msg, State) -> any()` 42 | 43 | 44 | 45 | ### handle_info/2 ### 46 | 47 | `handle_info(Info, State) -> any()` 48 | 49 | 50 | 51 | ### init/1 ### 52 | 53 | `init(X1) -> any()` 54 | 55 | 56 | 57 | ### rotate_keys/2 ### 58 | 59 | `rotate_keys(Pid, Key) -> any()` 60 | 61 | 62 | 63 | ### send/4 ### 64 | 65 | `send(Pid, DestIp, DestPort, Data) -> any()` 66 | 67 | 68 | 69 | ### start_link/3 ### 70 | 71 | `start_link(ListenIp, ListenPort, Keys) -> any()` 72 | 73 | 74 | 75 | ### terminate/2 ### 76 | 77 | `terminate(Reason, State) -> any()` 78 | 79 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {erl_opts, [ 2 | debug_info, 3 | warn_unused_vars, 4 | warn_shadow_vars, 5 | warn_unused_export, 6 | warn_unused_function, 7 | warn_unused_record, 8 | warn_deprecated_function, 9 | warn_deprecated_type, 10 | warn_obsolete_guard, 11 | strict_validation, 12 | warn_export_vars, 13 | warn_exported_vars, 14 | warn_untyped_record, 15 | fail_on_warning 16 | ]}. 17 | {deps, []}. 18 | {cover_enabled, true}. 19 | {cover_opts, [verbose]}. 20 | {edoc_opts, [ 21 | {doclet, edown_doclet}, 22 | {app_default, "http://www.erlang.org/doc/man"}, 23 | {doc_path, []}, 24 | {top_level_readme, {"./README.md", "https://github.com/ctbarbour/swim", "master"}} 25 | ]}. 26 | {xref_warnings, true}. 27 | {xref_checks, [ 28 | undefined_function_calls, 29 | undefined_functions, 30 | locals_not_used, 31 | deprecated_function_calls, 32 | deprecated_functions 33 | ]}. 34 | {dialyzer, [ 35 | {warnings, [ 36 | race_conditions, 37 | error_handling 38 | ]} 39 | ]}. 40 | {plugins, [rebar3_proper]}. 41 | {profiles, [ 42 | {docs, [{deps, [edown]}]}, 43 | {shell, [{deps, [recon, sync]}]}, 44 | {test, [{deps, [proper, meck]}]} 45 | ]}. 46 | -------------------------------------------------------------------------------- /rebar.lock: -------------------------------------------------------------------------------- 1 | []. 2 | -------------------------------------------------------------------------------- /src/swim.app.src: -------------------------------------------------------------------------------- 1 | {application, swim, 2 | [{description, "Scalable Weakly Consistent Infection-style Process Group Membership Protocol"}, 3 | {vsn, semver}, 4 | {registered, []}, 5 | {mod, {swim_app, []}}, 6 | {applications, 7 | [kernel, 8 | stdlib, 9 | crypto 10 | ]}, 11 | {env,[]}, 12 | {modules, []}, 13 | {licenses, ["Apache 2.0"]} 14 | ]}. 15 | -------------------------------------------------------------------------------- /src/swim.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017. All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | -module(swim). 19 | 20 | -export([join/1]). 21 | -export([members/0]). 22 | -export([myself/0]). 23 | -export([publish/1]). 24 | -export([subscribe/1]). 25 | -export([unsubscribe/1]). 26 | -export([setup/0]). 27 | 28 | -type member() :: {inet:ip_address(), inet:port_number()}. 29 | -type incarnation() :: non_neg_integer(). 30 | -type user_event() :: binary(). 31 | -type membership_event() :: alive_event() | suspect_event() | faulty_event(). 32 | -type suspect_event() :: {suspect, incarnation(), member(), member()}. 33 | -type alive_event() :: {alive, incarnation(), member()}. 34 | -type faulty_event() :: {faulty, incarnation(), member(), member()}. 35 | -type swim_event() :: {user, user_event()} | {membership, membership_event()}. 36 | 37 | -export_type([swim_event/0]). 38 | -export_type([member/0]). 39 | -export_type([incarnation/0]). 40 | -export_type([user_event/0]). 41 | -export_type([membership_event/0]). 42 | 43 | join(Seed) -> 44 | swim_pushpull:join(Seed, #{}). 45 | 46 | members() -> 47 | [M || {M, _S, _I} <- swim_state:members()]. 48 | 49 | myself() -> 50 | swim_state:local_member(). 51 | 52 | publish(Msg) when is_binary(Msg) -> 53 | swim_state:publish(Msg). 54 | 55 | subscribe(metrics) -> 56 | swim_metrics:subscribe(self()); 57 | subscribe(EventCategory) -> 58 | swim_subscriptions:subscribe(EventCategory, self()). 59 | 60 | unsubscribe(metrics) -> 61 | swim_metrics:unsubscribe(self()); 62 | unsubscribe(EventCategory) -> 63 | swim_subscriptions:unsubscribe(EventCategory, self()). 64 | 65 | setup() -> 66 | Key = base64:encode(crypto:strong_rand_bytes(32)), 67 | BasePort = 5000, 68 | Ms = lists:zip( 69 | [{{127,0,0,1}, P} || P <- lists:seq(BasePort, length(nodes()) + BasePort)], 70 | [node() | nodes()]), 71 | [rpc:call(Node, application, set_env, [swim, port, Port]) || {{_, Port}, Node} <- Ms], 72 | [rpc:call(Node, application, set_env, [swim, key, Key]) || Node <- [node() | nodes()]], 73 | [rpc:call(Node, application, start, [swim]) || Node <- [node() | nodes()]]. 74 | -------------------------------------------------------------------------------- /src/swim_app.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | -module(swim_app). 22 | -behavior(application). 23 | 24 | -export([start/2]). 25 | -export([stop/1]). 26 | 27 | start(_Type, _Args) -> 28 | swim_sup:start_link(). 29 | 30 | stop(_State) -> 31 | ok. 32 | -------------------------------------------------------------------------------- /src/swim_awareness.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | -module(swim_awareness). 22 | 23 | -export([new/1]). 24 | -export([success/1]). 25 | -export([failure/1]). 26 | -export([failure/2]). 27 | -export([scale/2]). 28 | 29 | -opaque awareness() :: {pos_integer(), non_neg_integer()}. 30 | -export_type([awareness/0]). 31 | 32 | -spec new(Max) -> Awareness when 33 | Max :: pos_integer(), 34 | Awareness :: awareness(). 35 | 36 | new(Max) -> 37 | {Max, 0}. 38 | 39 | -spec success(Awareness0) -> Awareness when 40 | Awareness0 :: awareness(), 41 | Awareness :: awareness(). 42 | 43 | success({Max, 0}) -> 44 | {Max, 0}; 45 | success({Max, Value}) -> 46 | {Max, Value - 1}. 47 | 48 | -spec failure(Awareness0) -> Awareness when 49 | Awareness0 :: awareness(), 50 | Awareness :: awareness(). 51 | 52 | failure({Max, Max}) -> 53 | {Max, Max}; 54 | failure({Max, Value}) -> 55 | {Max, Value + 1}. 56 | 57 | -spec failure(N, Awareness0) -> Awareness when 58 | N :: pos_integer(), 59 | Awareness0 :: awareness(), 60 | Awareness :: awareness(). 61 | 62 | failure(N, Awareness) -> 63 | lists:foldl(fun(_, A) -> failure(A) end, Awareness, lists:seq(1, N)). 64 | 65 | -spec scale(Timeout, Awareness) -> Value when 66 | Timeout :: non_neg_integer(), 67 | Awareness :: awareness(), 68 | Value :: non_neg_integer(). 69 | 70 | scale(Timeout, {_Max, Value}) -> 71 | Timeout * (Value + 1). 72 | -------------------------------------------------------------------------------- /src/swim_broadcasts.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | %%% @doc This module is responsible for maintaining membership updates and user 22 | %%% provided events along with their state as a part of the 23 | %%% infection-style dissemination component of the SWIM protocol. 24 | %%% 25 | %%% ### Infection-Style Dissemination 26 | %%% As an alternative to IP Multicast 27 | %%% or a point-to-point messaging scheme the SWIM protocol 28 | %%% disseminates membership updates by piggybacking on messages sent 29 | %%% as a part of the failure detection protocol. Thus, implementation 30 | %%% does not generate any extra packets to send membership updates. 31 | %%% 32 | %%% Here, `swim_broadcasts' maintains a buffer of recent membership 33 | %%% events along with a count for each event. The local count 34 | %%% specifies the number of times the event has been piggybacked so 35 | %%% far by this member and is used to choose which events to piggyback 36 | %%% next. Each event is piggybacked at most `Retransmit * log(N + 37 | %%% 1)' times, where `Retransmit' is a configurable parameter. 38 | %%% If the size of events in the buffer is larger than the maximum number of 39 | %%% events that can be piggybacked on a single PING or ACK, events that have 40 | %%% been gossiped fewer times are preferred. This is needed as the 41 | %%% protocol period is fixed and the rate of membership changes might 42 | %%% temporarily overwhelm the speed of dissemination. Preferring 43 | %%% "younger" events under such circumstances ensures that all 44 | %%% membership changes infect at least a few members - when the 45 | %%% membership change rate quiesces, older events will 46 | %%% propagate through the rest of the gossip group. Membership events are always 47 | %%% preferred over user-provided events. 48 | %%% 49 | %%% @TODO: Consider a more efficient implementation. We're sorting lists a lot. 50 | %%% We could potentially use a min-heap or priority queue but need to handle the requirement for 51 | %%% invalidating events about the same member. Consider 52 | %%% https://github.com/okeuday/pqueue/blob/master/src/pqueue4.erl 53 | %%% @end 54 | -module(swim_broadcasts). 55 | 56 | -export([new/1]). 57 | -export([new/2]). 58 | -export([insert/2]). 59 | -export([prune/2]). 60 | -export([take/1]). 61 | -export([take/2]). 62 | -export([retransmit_limit/2]). 63 | 64 | -record(broadcast, { 65 | members = [] :: [{non_neg_integer(), swim:membership_event()}], 66 | users = [] :: [{non_neg_integer(), swim:user_event()}], 67 | retransmits :: pos_integer(), 68 | limit_fun :: fun((swim:member_event() | swim:user_event()) -> pos_integer()), 69 | limit :: pos_integer() 70 | }). 71 | 72 | -opaque broadcast() :: #broadcast{}. 73 | -export_type([broadcast/0]). 74 | 75 | -spec new(Retransmits) -> Broadcast when 76 | Retransmits :: pos_integer(), 77 | Broadcast :: broadcast(). 78 | 79 | new(Retransmits) -> 80 | new(Retransmits, default_limit()). 81 | 82 | -spec new(Retransmits, Limit) -> Broadcast when 83 | Retransmits :: pos_integer(), 84 | Limit :: pos_integer(), 85 | Broadcast :: broadcast(). 86 | 87 | new(Retransmits, Limit) -> 88 | LimitFun = default_limit_fun(), 89 | #broadcast{retransmits = Retransmits, limit = Limit, limit_fun = LimitFun}. 90 | 91 | default_limit() -> 92 | swim_messages:event_size_limit(). 93 | 94 | default_limit_fun() -> 95 | fun(E) -> iolist_size(swim_messages:encode_event(E)) end. 96 | 97 | -spec retransmit_limit(NumMembers, Broadcast) -> Limit when 98 | NumMembers :: pos_integer(), 99 | Broadcast :: broadcast(), 100 | Limit :: pos_integer(). 101 | 102 | retransmit_limit(NumMembers, #broadcast{retransmits = Factor}) -> 103 | round(math:log(NumMembers + 1)) + Factor. 104 | 105 | -spec take(Broadcasts0) -> {Events, Broadcasts} when 106 | Broadcasts0 :: broadcast(), 107 | Events :: [swim:membership_event() | swim:user_event()], 108 | Broadcasts :: broadcast(). 109 | 110 | take(#broadcast{limit = Limit, limit_fun = Fun, members = Members, users = Users} = Broadcast) -> 111 | {B, M, U} = take(Limit, Fun, Members, Users, {[], [], []}), 112 | {B, Broadcast#broadcast{members = M, users = U}}. 113 | 114 | take(0, _Fun, Members, Users, {B, M, U}) -> 115 | {B, lists:sort(Members ++ M), lists:sort(Users ++ U)}; 116 | take(_Limit, _Fun, [], [], {B, M, U}) -> 117 | {B, lists:sort(M), lists:sort(U)}; 118 | take(Limit, Fun, [{T, E} | Members], Users, {B, M, U}) -> 119 | case Limit - Fun({membership, E}) of 120 | L when L >= 0 -> 121 | take(L, Fun, Members, Users, {[{membership, E} | B], [{T + 1, E} | M], U}); 122 | _ -> 123 | take(0, Fun, Members, Users, {B, [{T, E} | Members], U}) 124 | end; 125 | take(Limit, Fun, [], [{T, E} | Users], {B, M, U}) -> 126 | case Limit - Fun({user, E}) of 127 | L when L >= 0 -> 128 | take(L, Fun, [], Users, {[{user, E} | B], M, [{T + 1, E} | U]}); 129 | _ -> 130 | take(0, Fun, [], Users, {B, M, [{T, E} | U]}) 131 | end. 132 | 133 | -spec take(Member, Broadcasts0) -> {Events, Broadcasts} when 134 | Member :: swim:member(), 135 | Broadcasts0 :: broadcast(), 136 | Events :: [swim:membership_event() | swim:user_event()], 137 | Broadcasts :: broadcast(). 138 | 139 | take(Target, Broadcasts) -> 140 | #broadcast{limit = Limit, limit_fun = Fun, users = Users} = Broadcasts, 141 | Partition = fun({_, {suspect, _, M, _}}) -> M =:= Target; (_) -> false end, 142 | {Maybe, Members} = lists:partition(Partition, Broadcasts#broadcast.members), 143 | {B, M, U} = 144 | case Maybe of 145 | [] -> 146 | take(Limit, Fun, Members, Users, {[], [], []}); 147 | [{T, About}] -> 148 | Acc = {[{membership, About}], [{T + 1, About}], []}, 149 | take(Limit - Fun({membership, About}), Fun, Members, Users, Acc) 150 | end, 151 | {B, Broadcasts#broadcast{members = M, users = U}}. 152 | 153 | -spec prune(Retransmits, Broadcasts0) -> Broadcasts when 154 | Retransmits :: non_neg_integer(), 155 | Broadcasts0 :: broadcast(), 156 | Broadcasts :: broadcast(). 157 | 158 | prune(Retransmits, #broadcast{members = MembershipEvents0, users = UserEvents0} = Broadcast) -> 159 | Filter = fun({T, _}) -> T < Retransmits end, 160 | MembershipEvents = lists:filter(Filter, MembershipEvents0), 161 | UserEvents = lists:filter(Filter, UserEvents0), 162 | Broadcast#broadcast{members = MembershipEvents, users = UserEvents}. 163 | 164 | %% @doc Insert an Event or list of Events to the Broadcast queue 165 | %% 166 | %% Upon inserting a Membership Event we invalidate any existing event about the same target member 167 | %% to prevent the brodcast of stale information. 168 | %% @end 169 | -spec insert(Events, Broadcasts0) -> Broadcasts when 170 | Events :: swim:swim_event() | [swim:swim_event()], 171 | Broadcasts0 :: broadcast(), 172 | Broadcasts :: broadcast(). 173 | 174 | insert(Events, Broadcast) when is_list(Events) -> 175 | lists:foldl(fun insert/2, Broadcast, Events); 176 | insert({membership, Event}, #broadcast{members = MembershipEvents} = Broadcast) -> 177 | Broadcast#broadcast{members = invalidate(Event, MembershipEvents)}; 178 | insert({user, Event}, #broadcast{users = UserEvents} = Broadcast) -> 179 | Broadcast#broadcast{users = lists:sort([{0, Event} | UserEvents])}. 180 | 181 | -spec invalidate(Event, Events0) -> Events when 182 | Event :: swim:membership_event(), 183 | Events0 :: [{non_neg_integer(), swim:membership_event()}], 184 | Events :: [{non_neg_integer(), swim:membership_event()}]. 185 | 186 | invalidate({_, _, Member} = Event, Events) -> 187 | invalidate(Member, Event, Events); 188 | invalidate({_, _, Member, _} = Event, Events) -> 189 | invalidate(Member, Event, Events). 190 | 191 | -spec invalidate(Member, Event, Events0) -> Events when 192 | Member :: swim:member(), 193 | Event :: swim:membership_event(), 194 | Events0 :: [{non_neg_integer(), swim:membership_event()}], 195 | Events :: [{non_neg_integer(), swim:membership_event()}]. 196 | 197 | invalidate(Member, Event, Events) -> 198 | Filter = fun({_, {_, _, M}}) -> M =/= Member; 199 | ({_, {_, _, M, _}}) -> M =/= Member end, 200 | lists:sort([{0, Event} | lists:filter(Filter, Events)]). 201 | -------------------------------------------------------------------------------- /src/swim_failure.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | -module(swim_failure). 22 | -behavior(gen_server). 23 | 24 | -export([start_link/4]). 25 | -export([stop/0]). 26 | -export([probe/3]). 27 | -export([probe/4]). 28 | 29 | -export([init/1]). 30 | -export([handle_call/3]). 31 | -export([handle_cast/2]). 32 | -export([handle_info/2]). 33 | -export([code_change/3]). 34 | -export([terminate/2]). 35 | 36 | -record(state, { 37 | local_member :: swim:member(), 38 | probe :: undefined | probe(), 39 | ping_reqs = #{} :: #{{swim:member(), sequence()} := ping_req()}, 40 | nack_timeout :: non_neg_integer(), 41 | ack_timeout :: non_neg_integer(), 42 | socket :: undefined | inet:socket(), 43 | keyring :: swim_keyring:keyring(), 44 | sequence = 0 :: sequence() 45 | }). 46 | 47 | -record(probe, { 48 | target :: swim:member(), 49 | sequence :: sequence(), 50 | ack_timer :: reference(), 51 | probe_timer :: reference(), 52 | ack_fun :: fun((swim:member()) -> ok), 53 | missing_nacks = 0 :: non_neg_integer() 54 | }). 55 | 56 | -record(ping_req, { 57 | origin :: swim:member(), 58 | sequence :: sequence(), 59 | ack_timer :: reference(), 60 | nack_timer :: reference() 61 | }). 62 | 63 | -type ping_req() :: #ping_req{}. 64 | -type probe() :: #probe{}. 65 | -type sequence() :: non_neg_integer(). 66 | 67 | -export_type([sequence/0]). 68 | 69 | -spec start_link(Member, KeyRing, AckTimeout, NackTimeout) -> {ok, pid()} when 70 | Member :: swim:member(), 71 | KeyRing :: swim_keyring:keyring(), 72 | AckTimeout :: pos_integer(), 73 | NackTimeout :: pos_integer(). 74 | 75 | start_link(LocalMember, Keyring, AckTimeout, NackTimeout) -> 76 | Args = [LocalMember, Keyring, AckTimeout, NackTimeout], 77 | gen_server:start_link({local, ?MODULE}, ?MODULE, Args, []). 78 | 79 | -spec stop() -> ok. 80 | 81 | stop() -> 82 | gen_server:stop(?MODULE). 83 | 84 | -spec probe(Target, AckTimeout, ProbeTimeout) -> ok when 85 | Target :: swim:member(), 86 | AckTimeout :: pos_integer(), 87 | ProbeTimeout :: pos_integer(). 88 | 89 | probe(Target, AckTimeout, ProbeTimeout) -> 90 | probe(Target, AckTimeout, ProbeTimeout, fun swim_state:ack/1). 91 | 92 | -spec probe(Target, AckTimeout, ProbeTimeout, AckFun) -> ok when 93 | Target :: swim:member(), 94 | AckTimeout :: pos_integer(), 95 | ProbeTimeout :: pos_integer(), 96 | AckFun :: fun((swim:member()) -> ok). 97 | 98 | probe(Target, AckTimeout, ProbeTimeout, AckFun) 99 | when ProbeTimeout >= AckTimeout * 3 -> 100 | Msg = {probe, Target, AckTimeout, ProbeTimeout, AckFun}, 101 | gen_server:cast(?MODULE, Msg). 102 | 103 | %% @private 104 | init([{_, Port} = LocalMember, Keyring, AckTimeout, NackTimeout]) -> 105 | SocketOpts = [binary, {active, 16}], 106 | {ok, Socket} = swim_socket:open(Port, SocketOpts), 107 | State = #state{ 108 | local_member = LocalMember, 109 | keyring = Keyring, 110 | socket = Socket, 111 | ack_timeout = AckTimeout, 112 | nack_timeout = NackTimeout 113 | }, 114 | {ok, State}. 115 | 116 | %% @private 117 | handle_call(_Msg, _From, State) -> 118 | {noreply, State}. 119 | 120 | %% @private 121 | handle_cast({probe, Target, AckTimeout, ProbeTimeout, AckFun}, State) 122 | when State#state.probe =:= undefined -> 123 | {noreply, send_probe(Target, AckTimeout, ProbeTimeout, AckFun, State)}; 124 | handle_cast(_Msg, State) -> 125 | {noreply, State}. 126 | 127 | %% @private 128 | handle_info({udp_passive, Socket}, #state{socket = Socket} = State) -> 129 | ok = swim_socket:setopts(Socket, [{active, 16}]), 130 | {noreply, State}; 131 | handle_info({udp, Socket, Ip, InPortNo, Packet}, #state{socket = Socket} = State) -> 132 | {noreply, handle_packet(Packet, {Ip, InPortNo}, State)}; 133 | handle_info({probe_timeout, Target, Sequence}, State) -> 134 | {noreply, handle_probe_timeout(Target, Sequence, State)}; 135 | handle_info({ack_timeout, Target, Sequence}, State) -> 136 | {noreply, handle_ack_timeout(Target, Sequence, State)}; 137 | handle_info({nack_timeout, Target, Sequence}, State) -> 138 | {noreply, handle_nack_timeout(Target, Sequence, State)}; 139 | handle_info(_Info, State) -> 140 | {noreply, State}. 141 | 142 | %% @private 143 | code_change(_OldVsn, State, _Extra) -> 144 | {ok, State}. 145 | 146 | %% @private 147 | terminate(_Reason, #state{socket = undefined}) -> 148 | ok; 149 | terminate(_Reason, #state{socket = Socket}) -> 150 | swim_socket:close(Socket). 151 | 152 | send_probe(Target, AckTimeout, ProbeTimeout, AckFun, State) -> 153 | NextSequence = State#state.sequence + 1, 154 | Msg = {ping, NextSequence, Target}, 155 | NewState = send(Target, Msg, State), 156 | AckTimer = start_ack_timer(AckTimeout, Target, NextSequence), 157 | ProbeTimer = start_probe_timer(ProbeTimeout, Target, NextSequence), 158 | Probe = #probe{ 159 | target = Target, 160 | sequence = NextSequence, 161 | ack_timer = AckTimer, 162 | probe_timer = ProbeTimer, 163 | ack_fun = AckFun 164 | }, 165 | swim_metrics:notify({probe, Target}), 166 | NewState#state{probe = Probe, sequence = NextSequence}. 167 | 168 | handle_packet(Packet, Peer, State) -> 169 | case decrypt(Packet, State) of 170 | {ok, PlainText} -> 171 | try 172 | {Message, Events} = swim_messages:decode(PlainText), 173 | swim_metrics:notify({rx, iolist_size(Packet)}), 174 | spawn_link(fun() -> handle_events(Events) end), 175 | handle_message(Message, Peer, State) 176 | catch 177 | _:_ -> 178 | State 179 | end; 180 | {error, failed_verification} -> 181 | State 182 | end. 183 | 184 | handle_message({ack, Sequence, Terminal}, _Peer, State) -> 185 | swim_metrics:notify({ack, Terminal}), 186 | handle_ack(Sequence, Terminal, State); 187 | handle_message({nack, Sequence, Terminal}, Peer, State) -> 188 | swim_metrics:notify({nack, Terminal, Peer}), 189 | handle_nack(Sequence, Terminal, State); 190 | handle_message({ping, Sequence, Target}, Peer, State) -> 191 | swim_metrics:notify({ping, Peer}), 192 | handle_ping(Target, Sequence, Peer, State); 193 | handle_message({ping_req, Sequence, Terminal}, Peer, State) -> 194 | swim_metrics:notify({ping_req, Terminal, Peer}), 195 | handle_ping_req(Sequence, Terminal, Peer, State). 196 | 197 | handle_ack(Sequence, Responder, #state{probe = Probe} = State) 198 | when Responder =:= Probe#probe.target andalso Probe#probe.sequence =:= Sequence -> 199 | #probe{ack_fun = AckFun, ack_timer = AckTimer, probe_timer = ProbeTimer} = Probe, 200 | AckFun(Responder), 201 | swim_time:cancel_timer(AckTimer, [{async, true}, {info, false}]), 202 | swim_time:cancel_timer(ProbeTimer, [{async, true}, {info, false}]), 203 | State#state{probe = undefined}; 204 | handle_ack(Sequence, Responder, State) -> 205 | case maps:take({Responder, Sequence}, State#state.ping_reqs) of 206 | {PingReq, PingReqs} -> 207 | Msg = {ack, PingReq#ping_req.sequence, Responder}, 208 | NewState = send(PingReq#ping_req.origin, Msg, State), 209 | swim_time:cancel_timer(PingReq#ping_req.ack_timer, [{async, true}, {info, false}]), 210 | swim_time:cancel_timer(PingReq#ping_req.nack_timer, [{async, true}, {info, false}]), 211 | NewState#state{ping_reqs = PingReqs}; 212 | error -> 213 | State 214 | end. 215 | 216 | handle_nack(Sequence, Target, #state{probe = Probe} = State) 217 | when Target =:= Probe#probe.target andalso Probe#probe.sequence =:= Sequence -> 218 | #probe{missing_nacks = MissingNacks} = Probe, 219 | State#state{probe = Probe#probe{missing_nacks = MissingNacks - 1}}; 220 | handle_nack(_Sequence, _Target, State) -> 221 | State. 222 | 223 | handle_ping(Target, Sequence, Peer, #state{local_member = Target} = State) -> 224 | Msg = {ack, Sequence, Target}, 225 | send(Peer, Msg, State); 226 | handle_ping(_Target, _Sequence, _Peer, State) -> 227 | State. 228 | 229 | handle_ping_req(OriginSequence, Terminal, Origin, State) -> 230 | NextSequence = State#state.sequence + 1, 231 | Msg = {ping, NextSequence, Terminal}, 232 | NewState = send(Terminal, Msg, State), 233 | NackTimer = start_nack_timer(State#state.nack_timeout, Terminal, NextSequence), 234 | AckTimer = start_ack_timer(State#state.ack_timeout, Terminal, NextSequence), 235 | PingReq = #ping_req{origin = Origin, sequence = OriginSequence, 236 | ack_timer = AckTimer, nack_timer = NackTimer}, 237 | PingReqs = maps:put({Terminal, NextSequence}, PingReq, State#state.ping_reqs), 238 | NewState#state{ping_reqs = PingReqs, sequence = NextSequence}. 239 | 240 | handle_ack_timeout(Target, Sequence, #state{probe = Probe} = State) 241 | when Probe#probe.target =:= Target andalso Probe#probe.sequence =:= Sequence -> 242 | swim_metrics:notify({ack_timeout, Target}), 243 | swim_time:cancel_timer(Probe#probe.ack_timer, [{async, true}, {info, false}]), 244 | Msg = {ping_req, Sequence, Probe#probe.target}, 245 | Proxies = swim_state:proxies(Target), 246 | NewState = lists:foldl(fun(Proxy, S) -> send(Proxy, Msg, S) end, State, Proxies), 247 | NewState#state{probe = Probe#probe{missing_nacks = length(Proxies)}}; 248 | handle_ack_timeout(Target, Sequence, State) -> 249 | case maps:take({Target, Sequence}, State#state.ping_reqs) of 250 | {_, PingReqs} -> 251 | swim_metrics:notify({ack_timeout, Target}), 252 | State#state{ping_reqs = PingReqs}; 253 | error -> 254 | State 255 | end. 256 | 257 | handle_nack_timeout(Target, Sequence, State) -> 258 | case maps:find({Target, Sequence}, State#state.ping_reqs) of 259 | {ok, #ping_req{origin = Origin, sequence = OriginSequence}} -> 260 | swim_metrics:notify({nack_timeout, Target, Origin}), 261 | Msg = {nack, OriginSequence, Origin}, 262 | send(Origin, Msg, State); 263 | error -> 264 | State 265 | end. 266 | 267 | handle_probe_timeout(Target, Sequence, #state{probe = Probe} = State) 268 | when Probe#probe.target =:= Target andalso Probe#probe.sequence =:= Sequence -> 269 | swim_metrics:notify({probe_timeout, Target}), 270 | swim_state:probe_timeout(Target, Probe#probe.missing_nacks), 271 | State#state{probe = undefined}; 272 | handle_probe_timeout(_Target, _Sequence, State) -> 273 | State. 274 | 275 | handle_events(Events) -> 276 | [swim_state:handle_event(Event) || {Category, _} = Event <- Events, 277 | Category =:= membership], 278 | [swim_subscriptions:publish(Event) || {Category, _} = Event <- Events, 279 | Category =:= user], 280 | ok. 281 | 282 | % Not sure if we need to handle the case when sending to the socket fails or if we can 283 | % just let it crash. 284 | send({DestIp, DestPort} = Target, Msg, State) -> 285 | Events = swim_state:broadcasts(Target), 286 | Payload = encrypt(swim_messages:encode({Msg, Events}), State), 287 | ok = swim_socket:send(State#state.socket, DestIp, DestPort, Payload), 288 | swim_metrics:notify({tx, iolist_size(Payload)}), 289 | State. 290 | 291 | start_ack_timer(Timeout, Terminal, Sequence) -> 292 | swim_time:send_after(Timeout, self(), {ack_timeout, Terminal, Sequence}). 293 | 294 | start_nack_timer(Timeout, Terminal, Sequence) -> 295 | swim_time:send_after(Timeout, self(), {nack_timeout, Terminal, Sequence}). 296 | 297 | start_probe_timer(Timeout, Target, Sequence) -> 298 | swim_time:send_after(Timeout, self(), {probe_timeout, Target, Sequence}). 299 | 300 | encrypt(Msg, State) -> 301 | swim_keyring:encrypt(Msg, State#state.keyring). 302 | 303 | decrypt(CipherText, State) -> 304 | swim_keyring:decrypt(CipherText, State#state.keyring). 305 | -------------------------------------------------------------------------------- /src/swim_keyring.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | -module(swim_keyring). 22 | 23 | -export([new/1]). 24 | -export([new/2]). 25 | -export([add/2]). 26 | -export([encrypt/2]). 27 | -export([decrypt/2]). 28 | 29 | -define(AAD, crypto:hash(sha256, term_to_binary(erlang:get_cookie()))). 30 | 31 | -record(keyring, { 32 | keys :: nonempty_list(<<_:256>>), 33 | aad :: binary() 34 | }). 35 | 36 | -opaque keyring() :: #keyring{}. 37 | -export_type([keyring/0]). 38 | 39 | new(Keys) -> 40 | new(Keys, ?AAD). 41 | 42 | new(Keys, AAD) 43 | when is_list(Keys) andalso Keys =/= [] -> 44 | #keyring{keys = Keys, aad = AAD}. 45 | 46 | add(Key, KeyRing) 47 | when is_binary(Key) andalso byte_size(Key) =:= 32 -> 48 | KeyRing#keyring{keys = [Key | KeyRing#keyring.keys]}. 49 | 50 | %% @doc Encrypts the provided plain text using the Advanced Encryption Standard 51 | %% (AES) in Galois/Counter (GCM) using the provided 32-octet Key, 52 | %% Associated Authenticated Data (AAD), and a randomly generated 53 | %% Initialization Vector (IV). The resulting payload includes the 16-octet IV, 54 | %% the 16-octet CipherTag and the block encrypted cipher text. 55 | %% @end 56 | -spec encrypt(PlainText, Keyring) -> CipherText when 57 | PlainText :: iodata(), 58 | Keyring :: keyring(), 59 | CipherText :: iodata(). 60 | 61 | encrypt(PlainText, #keyring{keys = [Key | _], aad = AAD}) -> 62 | IV = crypto:strong_rand_bytes(16), 63 | {CipherText, CipherTag} = crypto:crypto_one_time_aead(aes_256_gcm, Key, IV, PlainText, AAD, true), 64 | <>. 65 | 66 | %% @doc Verifies the authenticity of the payload and decrypts the ciphertext 67 | %% generated by {@link encrypt/3}. Note the keys used as input to {@link encrypt/3} 68 | %% must be identical to those provided here. Decrypt is not responsible for 69 | %% decoding the underlying Swim protocol message -- see {@link decode/1}. 70 | -spec decrypt(CipherText, KeyRing) -> {ok, PlainText} | {error, failed_verification} when 71 | CipherText :: binary(), 72 | KeyRing :: keyring(), 73 | PlainText :: binary(). 74 | 75 | decrypt(<>, Keyring) -> 76 | #keyring{keys = Keys, aad = AAD} = Keyring, 77 | decrypt_loop(Keys, AAD, IV, CipherTag, CipherText); 78 | decrypt(_CipherText, _KeyRing) -> 79 | {error, failed_verification}. 80 | 81 | decrypt_loop([], _AAD, _IV, _CipherTag, _CipherText) -> 82 | {error, failed_verification}; 83 | decrypt_loop([Key | Keys], AAD, IV, CipherTag, CipherText) -> 84 | case crypto:crypto_one_time_aead(aes_256_gcm, Key, IV, CipherText, AAD, CipherTag, false) of 85 | error -> 86 | decrypt_loop(Keys, AAD, IV, CipherTag, CipherText); 87 | PlainText -> 88 | {ok, PlainText} 89 | end. 90 | 91 | -------------------------------------------------------------------------------- /src/swim_membership.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017. All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | %%% @doc This module is responsible for maintaining the list and status of non 22 | %%% faulty members in a gossip group through the use of the Suspicion Mechanism 23 | %%% described in the SWIM Paper. A `swim_membership' process becomes aware of 24 | %%% membership changes through exported function defined for a specific member 25 | %%% status, {@link alive/3}, {@link suspect/3}, {@link faulty/3}, as determined 26 | %%% by the Failure Detection mechanism of SWIM implemented in {@link swim}. Member 27 | %%% state includes the locally known status of a member as well as a logical clock 28 | %%% for the member's status known in the SWIM paper as the incarnation. 29 | %%% When the status of a member changes events are sent to {@link swim_broadcast} 30 | %%% to be broadcast to the rest of the members in the gossip group. 31 | %%% 32 | %%% @end 33 | 34 | -module(swim_membership). 35 | 36 | -export([new/5]). 37 | -export([local_member/1]). 38 | -export([local_state/1]). 39 | -export([members/1]). 40 | -export([probe_target/1]). 41 | -export([proxies/3]). 42 | -export([size/1]). 43 | -export([refuted/2]). 44 | -export([alive/3]). 45 | -export([suspect/4]). 46 | -export([faulty/4]). 47 | -export([handle_event/2]). 48 | 49 | -record(membership, { 50 | local_member :: swim:member(), 51 | incarnation = 0 :: swim:incarnation(), 52 | members = #{} :: #{swim:member() := state()}, 53 | faulty = ordsets:new() :: ordsets:ordset(swim:member()), 54 | probe_targets = [] :: [swim:member()], 55 | alpha :: pos_integer(), 56 | beta :: pos_integer(), 57 | protocol_period :: pos_integer(), 58 | suspicion_factor :: pos_integer() 59 | }). 60 | 61 | -record(alive, { 62 | incarnation = 0 :: swim:incarnation(), 63 | last_modified :: integer() 64 | }). 65 | 66 | -record(suspect, { 67 | incarnation :: swim:incarnation(), 68 | suspecting = ordsets:new() :: ordsets:ordset(swim:member()), 69 | tref :: reference(), 70 | last_modified :: integer(), 71 | min :: float(), 72 | max :: float(), 73 | k :: non_neg_integer(), 74 | timeout :: pos_integer() 75 | }). 76 | 77 | -type state() :: alive() | suspect(). 78 | -type alive() :: #alive{}. 79 | -type suspect() :: #suspect{}. 80 | 81 | -opaque membership() :: #membership{}. 82 | -export_type([membership/0]). 83 | 84 | new(LocalMember, Alpha, Beta, ProtocolPeriod, SuspicionFactor) -> 85 | #membership{ 86 | local_member = LocalMember, 87 | alpha = Alpha, 88 | beta = Beta, 89 | protocol_period = ProtocolPeriod, 90 | suspicion_factor = SuspicionFactor 91 | }. 92 | 93 | %% @doc The number of known members in the gossip group, including the local member 94 | -spec size(Membership) -> NumMembers when 95 | Membership :: membership(), 96 | NumMembers :: non_neg_integer(). 97 | 98 | size(#membership{members = Members}) -> 99 | maps:size(Members) + 1. 100 | 101 | -spec members(Membership) -> Members when 102 | Membership :: membership(), 103 | Members :: [{swim:member(), alive | suspect, swim:incarnation()}]. 104 | 105 | members(#membership{members = Members}) -> 106 | maps:fold(fun(Member, #alive{incarnation = Inc}, Acc) -> 107 | [{Member, alive, Inc} | Acc]; 108 | (Member, #suspect{incarnation = Inc}, Acc) -> 109 | [{Member, suspect, Inc} | Acc] 110 | end, [], Members). 111 | 112 | %% @doc The identifier for the local member 113 | -spec local_member(Membership) -> Member when 114 | Membership :: membership(), 115 | Member :: swim:member(). 116 | 117 | local_member(#membership{local_member = LocalMember}) -> 118 | LocalMember. 119 | 120 | -spec local_state(Membership) -> Events when 121 | Membership :: membership(), 122 | Events :: [swim:membership_event()]. 123 | 124 | local_state(#membership{local_member = LocalMember, incarnation = Inc, members = Members}) -> 125 | [{membership, {alive, Inc, LocalMember}} | 126 | maps:fold( 127 | fun(Member, #suspect{incarnation = Incarnation}, Acc) -> 128 | [{membership, {suspect, Incarnation, Member, LocalMember}} | Acc]; 129 | (Member, #alive{incarnation = Incarnation}, Acc) -> 130 | [{membership, {alive, Incarnation, Member}} | Acc] 131 | end, [], Members)]. 132 | 133 | -spec probe_target(Membership0) -> none | {Target, Membership} when 134 | Membership0 :: membership(), 135 | Target :: {swim:member(), swim:incarnation()}, 136 | Membership :: membership(). 137 | 138 | probe_target(#membership{probe_targets = []} = Membership) 139 | when map_size(Membership#membership.members) =:= 0 -> 140 | none; 141 | probe_target(#membership{probe_targets = []} = Membership) -> 142 | Members = maps:keys(Membership#membership.members), 143 | Targets = [M || {_, M} <- lists:keysort(1, [{rand:uniform(), N} || N <- Members])], 144 | probe_target(Membership#membership{probe_targets = Targets}); 145 | probe_target(#membership{probe_targets = [T | Targets]} = Membership) -> 146 | Target = 147 | case maps:find(T, Membership#membership.members) of 148 | {ok, #alive{incarnation = Inc}} -> {T, Inc}; 149 | {ok, #suspect{incarnation = Inc}} -> {T, Inc}; 150 | error -> probe_target(Membership#membership{probe_targets = Targets}) 151 | end, 152 | {Target, Membership#membership{probe_targets = Targets}}. 153 | 154 | -spec proxies(Num, Target, Membership) -> Proxies when 155 | Num :: pos_integer(), 156 | Target :: swim:member(), 157 | Membership :: membership(), 158 | Proxies :: [swim:member()]. 159 | 160 | proxies(Num, Target, Membership) -> 161 | Members = maps:keys(Membership#membership.members), 162 | Targets = [M || {_, M} <- lists:keysort(1, [{rand:uniform(), N} || N <- Members]), M =/= Target], 163 | lists:sublist(Targets, Num). 164 | 165 | -spec handle_event(Event, Membership0) -> {Events, Membership} when 166 | Event :: swim:membership_event(), 167 | Membership0 :: membership(), 168 | Events :: [swim:membership_event()], 169 | Membership :: membership(). 170 | 171 | handle_event({membership, {alive, Incarnation, Member}}, Membership) -> 172 | alive(Member, Incarnation, Membership); 173 | handle_event({membership, {suspect, Incarnation, Member, From}}, Membership) -> 174 | suspect(Member, Incarnation, From, Membership); 175 | handle_event({membership, {faulty, Incarnation, Member, From}}, Membership) -> 176 | faulty(Member, Incarnation, From, Membership); 177 | handle_event(_Event, Membership) -> 178 | {[], Membership}. 179 | 180 | %% @doc Set the member status to alive 181 | %% 182 | %% If the member isn't known it's added to the membership and an event is 183 | %% broadcast to notify the group of the alive member. If the member is known and the incarnation is 184 | %% greater than the current incarnation of the member we update the incarnation 185 | %% of member and broadcast an event to group. If the member is the local member and the incarnation 186 | %% is greater than the current incarnation we refute the alive message by setting our current 187 | %% incarnation to 1 + the received incarnation and then broad a new alive message to the group. 188 | %% If none of the above conditions are meet we do nothing. 189 | %% @end 190 | -spec alive(Member, Incarnation, Membership0) -> {Events, Membership} when 191 | Member :: swim:member(), 192 | Incarnation :: swim:incarnation(), 193 | Membership0 :: membership(), 194 | Events :: [swim:membership_event()], 195 | Membership :: membership(). 196 | 197 | alive(Member, Incarnation, Membership) 198 | when Member =:= Membership#membership.local_member andalso 199 | Incarnation =< Membership#membership.incarnation -> 200 | {[], Membership}; 201 | alive(Member, Incarnation, Membership) 202 | when Member =:= Membership#membership.local_member andalso 203 | Incarnation > Membership#membership.incarnation -> 204 | refute(Incarnation, Membership); 205 | alive(Member, Incarnation, Membership) -> 206 | #membership{members = CurrentMembers, faulty = Faulty} = Membership, 207 | case maps:find(Member, CurrentMembers) of 208 | error -> 209 | State = #alive{incarnation = Incarnation, 210 | last_modified = swim_time:monotonic_time()}, 211 | ProbeTargets = Membership#membership.probe_targets ++ [Member], 212 | Events = [{membership, {alive, Incarnation, Member}}], 213 | NewMembers = maps:put(Member, State, CurrentMembers), 214 | {Events, Membership#membership{ 215 | members = NewMembers, 216 | probe_targets = ProbeTargets, 217 | faulty = ordsets:del_element(Member, Faulty)}}; 218 | {ok, #suspect{} = Suspect} 219 | when Incarnation > Suspect#suspect.incarnation -> 220 | swim_time:cancel_timer(Suspect#suspect.tref, [{async, true}, {info, false}]), 221 | Alive = #alive{ 222 | incarnation = Incarnation, 223 | last_modified = swim_time:monotonic_time() 224 | }, 225 | NewMembers = maps:put(Member, Alive, CurrentMembers), 226 | Events = [{membership, {alive, Incarnation, Member}}], 227 | {Events, Membership#membership{members = NewMembers}}; 228 | {ok, #alive{incarnation = CurrentInc} = Alive0} 229 | when Incarnation > CurrentInc -> 230 | Alive = Alive0#alive{incarnation = Incarnation, 231 | last_modified = swim_time:monotonic_time()}, 232 | {[], Membership#membership{members = maps:put(Member, Alive, CurrentMembers)}}; 233 | {ok, _} -> 234 | {[], Membership} 235 | end. 236 | 237 | %% @doc Set the member status to suspect 238 | %% 239 | %% If the member isn't already known we do nothing. If the member is known 240 | %% we update the status and broadcast the change on the follow conditions. If 241 | %% the current status of the member is alive and the incarnation is greater than 242 | %% or equal to the known incarnation of the member, we update the member's status 243 | %% to suspect and broadcast the change. If the current status of the member is 244 | %% suspect and the incarnation is greater than the known incarnation, we update 245 | %% the member's status to suspect, set the known incarnation to the provided 246 | %% incarnation and broadcast the change. 247 | %% If the suspected member is the local member we refute by incrementing our own 248 | %% incarnation and broadcasting the change to the group. 249 | %% @end 250 | -spec suspect(Member, Incarnation, From, Membership0) -> {Events, Membership} when 251 | Member :: swim:member(), 252 | Incarnation :: swim:incarnation(), 253 | From :: local | swim:member(), 254 | Membership0 :: membership(), 255 | Events :: [swim:membership_event()], 256 | Membership :: membership(). 257 | 258 | suspect(Member, Incarnation, _From, Membership) 259 | when Member =:= Membership#membership.local_member -> 260 | refute(Incarnation, Membership); 261 | suspect(Member, Incarnation, local, Membership) -> 262 | #membership{local_member = From} = Membership, 263 | suspect(Member, Incarnation, From, Membership); 264 | suspect(Member, Incarnation, From, Membership) -> 265 | #membership{members = CurrentMembers, local_member = LocalMember} = Membership, 266 | case maps:find(Member, CurrentMembers) of 267 | {ok, #suspect{suspecting = Suspecting, incarnation = CurrentIncarnation, k = K} = Suspect} 268 | when Incarnation >= CurrentIncarnation -> 269 | case {ordsets:is_element(From, Suspecting), ordsets:size(Suspecting) < K} of 270 | {false, true} -> 271 | Elapsed = swim_time:cancel_timer(Suspect#suspect.tref), 272 | Timeout = remaining_suspicion_time(Elapsed, Suspect), 273 | TRef = start_timer(Timeout, Member, Incarnation), 274 | NewState = Suspect#suspect{ 275 | suspecting = ordsets:add_element(From, Suspecting), 276 | incarnation = Incarnation, 277 | tref = TRef, 278 | last_modified = swim_time:monotonic_time(), 279 | timeout = Timeout}, 280 | NewMembers = maps:put(Member, NewState, CurrentMembers), 281 | Events = [{membership, {suspect, Incarnation, Member, From}}], 282 | {Events, Membership#membership{members = NewMembers}}; 283 | _ -> 284 | NewState = Suspect#suspect{incarnation = Incarnation, 285 | last_modified = swim_time:monotonic_time()}, 286 | NewMembers = maps:put(Member, NewState, CurrentMembers), 287 | {[], Membership#membership{members = NewMembers}} 288 | end; 289 | {ok, #alive{incarnation = CurrentIncarnation}} 290 | when Incarnation >= CurrentIncarnation -> 291 | {Min, Max, K, Timeout} = initial_suspicion_timeout(Membership), 292 | TRef = start_timer(Timeout, Member, Incarnation), 293 | NewState = #suspect{ 294 | incarnation = Incarnation, 295 | suspecting = ordsets:from_list([From]), 296 | tref = TRef, 297 | last_modified = swim_time:monotonic_time(), 298 | min = Min, 299 | max = Max, 300 | k = K, 301 | timeout = Timeout 302 | }, 303 | NewMembers = maps:put(Member, NewState, CurrentMembers), 304 | Events = [{membership, {suspect, Incarnation, Member, LocalMember}}], 305 | {Events, Membership#membership{members = NewMembers}}; 306 | _ -> 307 | {[], Membership} 308 | end. 309 | 310 | start_timer(Timeout, Member, Incarnation) -> 311 | swim_time:send_after(Timeout, self(), {suspicion_timeout, Member, Incarnation}). 312 | 313 | remaining_suspicion_time(Remaining, Suspect) -> 314 | #suspect{suspecting = Suspecting, k = K, min = Min, max = Max, timeout = Total} = Suspect, 315 | Elapsed = Total - Remaining, 316 | Frac = math:log(ordsets:size(Suspecting) + 1) / math:log(K + 1), 317 | Timeout = floor(max(Min, Max - (Max - Min) * Frac)), 318 | Timeout - Elapsed. 319 | 320 | initial_suspicion_timeout(Membership) -> 321 | N = maps:size(Membership#membership.members), 322 | Min = Membership#membership.alpha * max(1, math:log(N)) * Membership#membership.protocol_period, 323 | Max = Membership#membership.beta * Min, 324 | % If there aren't enough members in the group excluding ourselves and the suspected member we 325 | % won't expect any additional suspicions so we immediately set the timeout to Min. 326 | K = case N < Membership#membership.suspicion_factor - 2 of 327 | true -> 0; 328 | false -> Membership#membership.suspicion_factor 329 | end, 330 | Timeout = case K < 1 of 331 | true -> Min; 332 | false -> Max 333 | end, 334 | {Min, Max, K, floor(Timeout)}. 335 | 336 | %% @doc Remove the member from the group 337 | %% 338 | %% If the member isn't already known we do nothing. If the member is known 339 | %% we remove the member and broadcast the change if the provided incarnation is 340 | %% greater than the current incarnation of the member. 341 | %% @end 342 | -spec faulty(Member, Incarnation, From, Membership0) -> {Events, Membership} when 343 | Member :: swim:member(), 344 | Incarnation :: swim:incarnation(), 345 | From :: local | swim:member(), 346 | Membership0 :: membership(), 347 | Events :: [swim:membership_event()], 348 | Membership :: membership(). 349 | 350 | faulty(Member, Incarnation, _From, Membership) 351 | when Member =:= Membership#membership.local_member -> 352 | refute(Incarnation, Membership); 353 | faulty(Member, Incarnation, local, Membership) -> 354 | #membership{local_member = From} = Membership, 355 | faulty(Member, Incarnation, From, Membership); 356 | faulty(Member, Incarnation, From, Membership) -> 357 | #membership{members = CurrentMembers, faulty = Faulty} = Membership, 358 | case maps:find(Member, CurrentMembers) of 359 | {ok, #suspect{incarnation = CurrentIncarnation}} 360 | when Incarnation >= CurrentIncarnation -> 361 | {[{membership, {faulty, Incarnation, Member, From}}], 362 | Membership#membership{members = maps:remove(Member, CurrentMembers), 363 | faulty = ordsets:add_element(Member, Faulty)}}; 364 | _ -> 365 | {[], Membership} 366 | end. 367 | 368 | refuted([], _Membership) -> 369 | false; 370 | refuted([{membership, {alive, _Inc, Member}} | _Events], Membership) 371 | when Membership#membership.local_member =:= Member -> 372 | true; 373 | refuted([_Event | Events], Membership) -> 374 | refuted(Events, Membership). 375 | 376 | %% @private 377 | refute(Incarnation, #membership{local_member = LocalMember} = Membership) 378 | when Incarnation >= Membership#membership.incarnation -> 379 | NewIncarnation = Incarnation + 1, 380 | {[{membership, {alive, NewIncarnation, LocalMember}}], 381 | Membership#membership{incarnation = NewIncarnation}}; 382 | refute(Incarnation, #membership{incarnation = CurrentIncarnation} = Membership) 383 | when Incarnation < CurrentIncarnation -> 384 | {[{membership, {alive, CurrentIncarnation, Membership#membership.local_member}}], Membership}. 385 | -------------------------------------------------------------------------------- /src/swim_messages.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017. All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | %%% @doc This module is responsible for encoding and decoding SWIM protocol 22 | %%% messages. 23 | %%% 24 | %%% SWIM protocol message encodings can be found in the documentation 25 | %%% cooresponding to the various encoding functions defined in this module. 26 | %%% {@link encode_ack/3}, {@link encode_ping/3}, {@link encode_ping_req/2}, 27 | %%% {@link encode_leave/1}. 28 | %%% All SWIM protocol messages are prefixed with a single octet reflecting 29 | %%% the protocol version of the message. The overall format of SWIM messages is: 30 | %%% 31 | %%% 32 | %%% 33 | %%% 34 | %%% 35 | %%% 36 | %%% 37 | %%% 38 | %%% 39 | %%% 40 | %%% 41 | %%% 42 | %%%
11N
VersionTagData
43 | %%% - __*Version*__ : is the protocol version the message is encoded for 44 | %%% - __*Tag*__ : indicates what type of SWIM message Data represents; ACK, PING, 45 | %%% PING-REG, or LEAVE 46 | %%% - __*Data*__ : The SWIM messages payload 47 | %%% 48 | %%% All SWIM messages are encrypted over the wire using AES128-GCM. See 49 | %%% {@link swim_keyring:encrypt/2} for more information. 50 | %%% @end 51 | -module(swim_messages). 52 | 53 | -export([encode/1]). 54 | -export([decode/1]). 55 | -export([encode_event/1]). 56 | -export([event_size_limit/0]). 57 | 58 | -define(HEADER, 1). 59 | 60 | -type ack() :: {ack, swim_failure:sequence(), swim:member()}. 61 | -type nack() :: {nack, swim_failure:sequence(), swim:member()}. 62 | -type ping() :: {ping, swim_failure:sequence(), swim:member()}. 63 | -type ping_req() :: {ping_req, swim_failure:sequence(), swim:member()}. 64 | -type swim_message() :: ack() | nack() | ping() | ping_req(). 65 | 66 | -export_type([swim_message/0]). 67 | 68 | %% @doc Event size limit determines the maximum size (in octets) available to 69 | %% to piggyback membership and user events on an ACK or PING message. 70 | %% 71 | %% The max message size we use is the minimum reassembly buffer size defined for 72 | %% IPv4 to avoid IP fragmentation -- 576 octets. 73 | %% UDP has an overhead of a 20 octet IP header and an 8 octet 74 | %% UDP header. A PING/ACK/PING-REQ are each 13 octets plus 16 octets for the 75 | %% nonce, and 16 octets for the CipherTag for a minimum size of 84 bytes. That leaves 76 | %% 492 octets for the events. 77 | %% A membership event is 42 octets which equates to a maximum of 11 membership events per 78 | %% ACK/PING/PING-REQ message. A user event can be a maximum of 492 octets. 79 | %% @end 80 | -spec event_size_limit() -> non_neg_integer(). 81 | event_size_limit() -> 82 | 452. 83 | 84 | encode({{ack, Sequence, Member}, Events}) -> 85 | [?HEADER, $a, <>, encode_member(Member), encode_events(Events)]; 86 | encode({{nack, Sequence, Member}, Events}) -> 87 | [?HEADER, $n, <>, encode_member(Member), encode_events(Events)]; 88 | encode({{ping, Sequence, Member}, Events}) -> 89 | [?HEADER, $p, <>, encode_member(Member), encode_events(Events)]; 90 | encode({{ping_req, Sequence, Member}, Events}) -> 91 | [?HEADER, $r, <>, encode_member(Member), encode_events(Events)]. 92 | 93 | encode_events([]) -> 94 | []; 95 | encode_events(Events) -> 96 | L = length(Events), 97 | [<>, encode_es(Events)]. 98 | 99 | encode_es([]) -> 100 | []; 101 | encode_es([Event | Events]) -> 102 | [encode_event(Event) | encode_es(Events)]. 103 | 104 | %% @doc Encode either a membership event or a user event. 105 | %% 106 | %% A membership event is encoded as follows: 107 | %% 108 | %% 109 | %% 110 | %% 111 | %% 112 | %% 113 | %% 114 | %% 115 | %% 116 | %% 117 | %% 118 | %% 119 | %% 120 | %%
1164
50StatusMemberIncarnation
121 | %%
122 | %%
Status
123 | %%
is observed status of the Member being broadcast to the group
124 | %%
Member
125 | %%
is the subject of this membership event
126 | %%
Incarnation
127 | %%
is the incarnation of the subject Member known by the sender of this 128 | %% event. See {@link swim_membership} for more information on Incarnations.
129 | %%
130 | %% 131 | %% A user event is encoded as follows: 132 | %% 133 | %% 134 | %% 135 | %% 136 | %% 137 | %% 138 | %% 139 | %% 140 | %% 141 | %% 142 | %% 143 | %%
12Size
51SizeErlang Term
144 | %% @end 145 | -spec encode_event(Event) -> iolist() when Event :: swim:swim_event(). 146 | 147 | encode_event({membership, {suspect, Incarnation, Target, From}}) -> 148 | [$m, $s, <>, encode_member(Target), encode_member(From)]; 149 | encode_event({membership, {alive, Incarnation, Target}}) -> 150 | [$m, $a, <>, encode_member(Target)]; 151 | encode_event({membership, {faulty, Incarnation, Target, From}}) -> 152 | [$m, $f, <>, encode_member(Target), encode_member(From)]; 153 | encode_event({user, Bin}) when is_binary(Bin) -> 154 | [$u, <<(byte_size(Bin)):16/integer>>, Bin]. 155 | 156 | %% @doc Encodes a Member as the IP address and port number combination. 157 | %% 158 | %% 159 | %% 160 | %% 161 | %% 162 | %% 163 | %% 164 | %% 165 | %% 166 | %% 167 | %% 168 | %% 169 | %%
1Size2
SizeIP AddressPort Number
170 | %%
171 | %%
IP Address
172 | %%
is the IPv4 or IPv6 address the Member can be reached
173 | %%
Port Number
174 | %%
is the associated Port Number the Member is listening on
175 | %%
176 | %% @end 177 | -spec encode_member(Member) -> binary() when Member :: swim:member(). 178 | 179 | encode_member({{A1, A2, A3, A4}, Port}) -> 180 | <<6, A1:8/integer, A2:8/integer, A3:8/integer, A4:8/integer, Port:16/integer>>; 181 | encode_member({{A1, A2, A3, A4, A5, A6, A7, A8}, Port}) -> 182 | <<18, 183 | A1:16/integer, A2:16/integer, A3:16/integer, A4:16/integer, 184 | A5:16/integer, A6:16/integer, A7:16/integer, A8:16/integer, 185 | Port:16/integer>>. 186 | 187 | %% @doc Decodes the provided message from a binary to an Erlang Term. 188 | %% 189 | %% All messages are prefixed with a single octet to indicate the version of 190 | %% of the protocol. The return value is an Erlang term of the message. If the 191 | %% version is not supported or the message is malformed, an exception is thrown. 192 | %% @end 193 | -spec decode(Packet) -> Result when 194 | Packet :: binary(), 195 | Result :: {swim_message(), [swim:swim_event()]} | no_return(). 196 | 197 | decode(<>) -> 198 | {{ack, Sequence, decode_member(Member)}, decode_events(Events)}; 199 | decode(<>) -> 200 | {{nack, Sequence, decode_member(Member)}, decode_events(Events)}; 201 | decode(<>) -> 202 | {{ping_req, Sequence, decode_member(Member)}, decode_events(Events)}; 203 | decode(<>) -> 204 | {{ping, Sequence, decode_member(Member)}, decode_events(Events)}. 205 | 206 | decode_member(<>) -> 207 | {{A1, A2, A3, A4}, Port}; 208 | decode_member(<>) -> 211 | {{A1, A2, A3, A4, A5, A6, A7, A8}, Port}. 212 | 213 | decode_events(<<>>) -> 214 | []; 215 | decode_events(<>) -> 216 | decode_es(L, Events). 217 | 218 | decode_es(0, <<>>) -> 219 | []; 220 | decode_es(K, <<$m, $s, I:32/integer, L:8, T:L/binary, S:8, F:S/binary, Es/binary>>) -> 221 | [{membership, {suspect, I, decode_member(T), decode_member(F)}} | decode_es(K - 1, Es)]; 222 | decode_es(K, <<$m, $f, I:32/integer, L:8, T:L/binary, S:8, F:S/binary, Es/binary>>) -> 223 | [{membership, {faulty, I, decode_member(T), decode_member(F)}} | decode_es(K - 1, Es)]; 224 | decode_es(K, <<$m, $a, I:32/integer, L:8, T:L/binary, Es/binary>>) -> 225 | [{membership, {alive, I, decode_member(T)}} | decode_es(K - 1, Es)]; 226 | decode_es(K, <<$u, L:16/integer, Event:L/binary, Events/binary>>) -> 227 | [{user, Event} | decode_es(K - 1, Events)]. 228 | -------------------------------------------------------------------------------- /src/swim_metrics.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | -module(swim_metrics). 22 | -behavior(gen_event). 23 | 24 | -export([start_link/0]). 25 | -export([notify/1]). 26 | -export([subscribe/1]). 27 | -export([unsubscribe/1]). 28 | 29 | -export([init/1]). 30 | -export([handle_event/2]). 31 | -export([handle_call/2]). 32 | -export([handle_info/2]). 33 | -export([code_change/3]). 34 | -export([terminate/2]). 35 | 36 | start_link() -> 37 | gen_event:start_link({local, ?MODULE}). 38 | 39 | subscribe(Pid) -> 40 | gen_event:add_handler(?MODULE, ?MODULE, [Pid]). 41 | 42 | unsubscribe(Pid) -> 43 | gen_event:delete_handler(?MODULE, ?MODULE, [Pid]). 44 | 45 | notify(Event) -> 46 | gen_event:notify(?MODULE, Event). 47 | 48 | init([Subscriber]) -> 49 | erlang:monitor(process, Subscriber), 50 | {ok, Subscriber}. 51 | 52 | handle_event(Event, Subscriber) -> 53 | Subscriber ! Event, 54 | {ok, Subscriber}. 55 | 56 | handle_call(Request, State) -> 57 | {ok, Request, State}. 58 | 59 | handle_info({'DOWN', _MRef, process, Subscriber, _Reason}, Subscriber) -> 60 | remove_handler; 61 | handle_info(_Info, State) -> 62 | {ok, State}. 63 | 64 | code_change(_OldVsn, State, _Extra) -> 65 | {ok, State}. 66 | 67 | terminate(_Reason, _State) -> 68 | ok. 69 | -------------------------------------------------------------------------------- /src/swim_pushpull.erl: -------------------------------------------------------------------------------- 1 | -module(swim_pushpull). 2 | -behavior(gen_server). 3 | 4 | -export([join/2]). 5 | -export([start_link/3]). 6 | -export([accept/4]). 7 | 8 | -export([init/1]). 9 | -export([handle_call/3]). 10 | -export([handle_cast/2]). 11 | -export([handle_info/2]). 12 | -export([code_change/3]). 13 | -export([terminate/2]). 14 | 15 | -record(state, { 16 | socket :: inet:socket() | ssl:socket(), 17 | acceptors :: ets:tab(), 18 | local_member :: swim:member(), 19 | opts :: maps:map() 20 | }). 21 | 22 | join(Member, Opts) -> 23 | LocalMember = swim_state:local_member(), 24 | Transport = maps:get(transport, Opts, tcp), 25 | TransportOpts = [binary, {packet, 4}, {active, false}, {nodelay, true} 26 | | maps:get(transport_opts, Opts, [])], 27 | Retries = maps:get(retries, Opts, 5), 28 | case connect(Member, Transport, TransportOpts, Opts, Retries) of 29 | {ok, Socket} -> 30 | Msg = {push_pull, LocalMember, [{membership, {alive, 0, LocalMember}}]}, 31 | ok = swim_socket:send(Socket, encode(Msg)), 32 | case swim_socket:recv(Socket, 0, 5000) of 33 | {ok, Data} -> 34 | swim_socket:close(Socket), 35 | {push_pull, _RemoteMember, RemoteState} = decode(Data), 36 | merge_state(RemoteState), 37 | ok; 38 | Error -> 39 | Error 40 | end; 41 | Err -> 42 | Err 43 | end. 44 | 45 | connect({Ip, Port} = Member, Transport, TransportOpts, Opts, Retries) -> 46 | ConnectTimeout = maps:get(connect_timeout, Opts, 5000), 47 | case swim_socket:connect(Transport, Ip, Port, TransportOpts, ConnectTimeout)of 48 | {ok, Socket} -> 49 | {ok, Socket}; 50 | {error, _Reason} -> 51 | retry_connect(Member, Transport, TransportOpts, Opts, Retries) 52 | end. 53 | 54 | retry_connect(_Member, _Transport, _TransportOpts, _Opts, 0) -> 55 | {error, retry_limit_exceeded}; 56 | retry_connect(Member, Transport, TransportOpts, Opts, Retries) -> 57 | RetryTimeout = maps:get(retry_timeout, Opts, 5000), 58 | _ = erlang:send_after(RetryTimeout, self(), retry), 59 | receive 60 | retry -> 61 | connect(Member, Transport, TransportOpts, Opts, Retries - 1) 62 | end. 63 | 64 | start_link(IpAddr, Port, Opts) -> 65 | gen_server:start_link(?MODULE, [IpAddr, Port, Opts], []). 66 | 67 | init([IpAddr, Port, Opts]) -> 68 | MinAcceptors = maps:get(min_acceptors, Opts, 2), 69 | TcpOpts = [binary, {packet, 4}, {ip, IpAddr}, 70 | {reuseaddr, true}, {nodelay, true}, 71 | {active, false}], 72 | {ok, Socket} = swim_socket:listen(tcp, Port, TcpOpts), 73 | Acceptors = ets:new(accecptor, [private, set]), 74 | State = #state{local_member = {IpAddr, Port}, socket = Socket, 75 | acceptors = Acceptors, opts = Opts}, 76 | [start_add_acceptor(State) || _ <- lists:seq(1, MinAcceptors)], 77 | {ok, State}. 78 | 79 | handle_call(_Req, _From, State) -> 80 | {noreply, State}. 81 | 82 | handle_cast(accepted, State) -> 83 | ok = start_add_acceptor(State), 84 | {noreply, State}; 85 | handle_cast(_Req, State) -> 86 | {noreply, State}. 87 | 88 | handle_info({'EXIT', _Pid, {error, emfile}}, State) -> 89 | {stop, emfile, State}; 90 | handle_info({'EXIT', Pid, _Reason}, State) -> 91 | ok = remove_acceptor(State, Pid), 92 | {noreply, State}; 93 | handle_info(_Info, State) -> 94 | {noreply, State}. 95 | 96 | code_change(_OldVsn, State, _Extra) -> 97 | {ok, State}. 98 | 99 | terminate(_Reason, _State) -> 100 | ok. 101 | 102 | start_add_acceptor(State) -> 103 | Args = [self(), State#state.local_member, State#state.socket, State#state.opts], 104 | Pid = spawn_link(?MODULE, accept, Args), 105 | ets:insert(State#state.acceptors, {Pid}), 106 | ok. 107 | 108 | remove_acceptor(State, Pid) -> 109 | ets:delete(State#state.acceptors, Pid), 110 | ok. 111 | 112 | accept(Server, LocalMember, ListenSocket, Opts) -> 113 | case catch swim_socket:accept(ListenSocket, Server, maps:get(accept_timeout, Opts, 10000)) of 114 | {ok, Socket} -> 115 | read_message(LocalMember, Socket, Opts), 116 | swim_socket:close(Socket), 117 | ok; 118 | {error, timeout} -> 119 | accept(Server, LocalMember, ListenSocket, Opts); 120 | {error, econnaborted} -> 121 | accept(Server, LocalMember, ListenSocket, Opts); 122 | {error, {tls_alert, _}} -> 123 | accept(Server, LocalMember, ListenSocket, Opts); 124 | {error, closed} -> 125 | ok; 126 | {error, Reason} -> 127 | exit({error, Reason}) 128 | end. 129 | 130 | read_message(LocalMember, Socket, Opts) -> 131 | case swim_socket:recv(Socket, 0, maps:get(receive_timeout, Opts, 60000)) of 132 | {ok, Data} -> 133 | handle_message(decode(Data), LocalMember, Socket); 134 | {error, Reason} -> 135 | {error, Reason} 136 | end. 137 | 138 | handle_message({push_pull, RemoteMember, RemoteState}, LocalMember, Socket) -> 139 | LocalState = swim_state:local_state(), 140 | send_message({push_pull, LocalMember, LocalState}, Socket), 141 | swim_metrics:notify({push_pull, RemoteMember}), 142 | spawn_link(fun() -> merge_state(RemoteState) end), 143 | ok; 144 | handle_message(_Other, _LocalMember, _Socket) -> 145 | ok. 146 | 147 | send_message(Message, Socket) -> 148 | EncodedMessage = encode(Message), 149 | swim_socket:send(Socket, EncodedMessage). 150 | 151 | decode(Data) -> 152 | binary_to_term(Data). 153 | 154 | encode(Data) -> 155 | term_to_binary(Data). 156 | 157 | merge_state(RemoteState) -> 158 | [swim_state:handle_event(Event) || Event <- RemoteState]. 159 | -------------------------------------------------------------------------------- /src/swim_pushpull_sup.erl: -------------------------------------------------------------------------------- 1 | -module(swim_pushpull_sup). 2 | -behavior(supervisor). 3 | 4 | -export([start_link/2]). 5 | -export([init/1]). 6 | 7 | start_link(IpAddr, Port) -> 8 | supervisor:start_link({local, ?MODULE}, ?MODULE, [IpAddr, Port]). 9 | 10 | init([IpAddr, Port]) -> 11 | ListenerSpec = #{ 12 | id => pushpull, 13 | start => {swim_pushpull, start_link, [IpAddr, Port, #{}]}}, 14 | Flags = #{strategy => one_for_one, 15 | intensity => 10, 16 | period => 10}, 17 | {ok, {Flags, [ListenerSpec]}}. 18 | -------------------------------------------------------------------------------- /src/swim_socket.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | -module(swim_socket). 22 | 23 | -export([connect/5]). 24 | -export([open/2]). 25 | -export([close/1]). 26 | -export([send/4]). 27 | -export([setopts/2]). 28 | -export([listen/3]). 29 | -export([accept/3]). 30 | -export([recv/3]). 31 | -export([send/2]). 32 | -export([peername/1]). 33 | 34 | open(Port, Opts) -> 35 | gen_udp:open(Port, Opts). 36 | 37 | connect(tcp, IpAddr, Port, Opts, Timeout) -> 38 | case gen_tcp:connect(IpAddr, Port, Opts, Timeout) of 39 | {ok, Socket} -> 40 | {ok, {tcp, Socket}}; 41 | {error, Reason} -> 42 | {error, Reason} 43 | end; 44 | connect(ssl, IpAddr, Port, Opts, Timeout) -> 45 | case ssl:connect(IpAddr, Port, Opts, Timeout) of 46 | {ok, Socket} -> 47 | {ok, {ssl, Socket}}; 48 | {error, Reason} -> 49 | {error, Reason} 50 | end. 51 | 52 | close({tcp, Socket}) -> 53 | gen_tcp:close(Socket); 54 | close({ssl, Socket}) -> 55 | ssl:close(Socket); 56 | close(Socket) -> 57 | gen_udp:close(Socket). 58 | 59 | send(Socket, DestIp, DestPort, Payload) -> 60 | gen_udp:send(Socket, DestIp, DestPort, Payload). 61 | 62 | setopts({tcp, Socket}, Opts) -> 63 | inet:setopts(Socket, Opts); 64 | setopts({ssl, Socket}, Opts) -> 65 | ssl:setopts(Socket, Opts); 66 | setopts(Socket, Opts) -> 67 | inet:setopts(Socket, Opts). 68 | 69 | listen(tcp, Port, Opts) -> 70 | case gen_tcp:listen(Port, Opts) of 71 | {ok, Socket} -> 72 | {ok, {tcp, Socket}}; 73 | {error, Reason} -> 74 | {error, Reason} 75 | end; 76 | listen(ssl, Port, Opts) -> 77 | case ssl:listen(Port, Opts) of 78 | {ok, Socket} -> 79 | {ok, {ssl, Socket}}; 80 | {error, Reason} -> 81 | {error, Reason} 82 | end. 83 | 84 | accept({tcp, ListenSocket}, Pid, Timeout) -> 85 | case gen_tcp:accept(ListenSocket, Timeout) of 86 | {ok, Socket} -> 87 | gen_server:cast(Pid, accepted), 88 | {ok, {tcp, Socket}}; 89 | {error, Reason} -> 90 | {error, Reason} 91 | end; 92 | accept({ssl, ListenSocket}, Pid, Timeout) -> 93 | case ssl:transport_accept(ListenSocket, Timeout) of 94 | {ok, Socket} -> 95 | gen_server:cast(Pid, accepted), 96 | case ssl:handshake(Socket, Timeout) of 97 | ok -> 98 | {ok, {ssl, Socket}}; 99 | {error, closed} -> 100 | {error, econnaborted}; 101 | {error, Reason} -> 102 | {error, Reason} 103 | end; 104 | {error, Reason} -> 105 | {error, Reason} 106 | end. 107 | 108 | recv({tcp, Socket}, Size, Timeout) -> 109 | gen_tcp:recv(Socket, Size, Timeout); 110 | recv({ssl, Socket}, Size, Timeout) -> 111 | ssl:recv(Socket, Size, Timeout). 112 | 113 | send({tcp, Socket}, Data) -> 114 | gen_tcp:send(Socket, Data); 115 | send({ssl, Socket}, Data) -> 116 | ssl:send(Socket, Data). 117 | 118 | peername({tcp, Socket}) -> 119 | inet:peername(Socket); 120 | peername({ssl, Socket}) -> 121 | ssl:peername(Socket). 122 | -------------------------------------------------------------------------------- /src/swim_state.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | -module(swim_state). 22 | -behavior(gen_server). 23 | 24 | -export([start_link/4]). 25 | -export([local_member/0]). 26 | -export([local_state/0]). 27 | -export([members/0]). 28 | -export([proxies/1]). 29 | -export([broadcasts/1]). 30 | -export([ack/1]). 31 | -export([probe_timeout/2]). 32 | -export([handle_event/1]). 33 | -export([publish/1]). 34 | 35 | -export([init/1]). 36 | -export([handle_call/3]). 37 | -export([handle_cast/2]). 38 | -export([handle_info/2]). 39 | -export([code_change/3]). 40 | -export([terminate/2]). 41 | 42 | -record(state, { 43 | protocol_period :: pos_integer(), 44 | ack_timeout :: pos_integer(), 45 | probe_timeout :: pos_integer(), 46 | num_proxies :: pos_integer(), 47 | current_probe :: undefined | {swim:member(), swim:incarnation()}, 48 | membership :: swim_membership:membership(), 49 | broadcasts :: swim_broadcasts:broadcasts(), 50 | awareness :: swim_awareness:awareness() 51 | }). 52 | 53 | start_link(Membership, Broadcasts, Awareness, Opts) -> 54 | gen_server:start_link({local, ?MODULE}, ?MODULE, [Membership, Broadcasts, Awareness, Opts], []). 55 | 56 | local_member() -> 57 | gen_server:call(?MODULE, local_member). 58 | 59 | local_state() -> 60 | gen_server:call(?MODULE, local_state). 61 | 62 | members() -> 63 | gen_server:call(?MODULE, members). 64 | 65 | proxies(Target) -> 66 | gen_server:call(?MODULE, {proxies, Target}). 67 | 68 | broadcasts(Target) -> 69 | gen_server:call(?MODULE, {broadcasts, Target}). 70 | 71 | publish(Event) -> 72 | gen_server:cast(?MODULE, {publish, Event}). 73 | 74 | ack(Member) -> 75 | gen_server:cast(?MODULE, {ack, Member}). 76 | 77 | probe_timeout(Member, MissedNacks) -> 78 | gen_server:cast(?MODULE, {probe_timeout, Member, MissedNacks}). 79 | 80 | handle_event(Event) -> 81 | gen_server:cast(?MODULE, {broadcast_event, Event}). 82 | 83 | %% @private 84 | init([Membership, Broadcasts, Awareness, Opts]) -> 85 | State = 86 | #state{ 87 | membership = Membership, 88 | broadcasts = Broadcasts, 89 | awareness = Awareness, 90 | ack_timeout = maps:get(ack_timeout, Opts), 91 | probe_timeout = maps:get(probe_timeout, Opts), 92 | protocol_period = maps:get(protocol_period, Opts), 93 | num_proxies = maps:get(num_proxies, Opts) 94 | }, 95 | self() ! protocol_period, 96 | {ok, State}. 97 | 98 | %% @private 99 | handle_call(local_member, _From, State) -> 100 | #state{membership = Membership} = State, 101 | {reply, swim_membership:local_member(Membership), State}; 102 | handle_call(members, _From, State) -> 103 | {reply, swim_membership:members(State#state.membership), State}; 104 | handle_call(local_state, _From, State) -> 105 | {reply, swim_membership:local_state(State#state.membership), State}; 106 | handle_call({proxies, Target}, _From, State) -> 107 | Proxies = swim_membership:proxies(State#state.num_proxies, Target, State#state.membership), 108 | {reply, Proxies, State}; 109 | handle_call({broadcasts, Target}, _From, State) -> 110 | #state{membership = Membership, broadcasts = Broadcasts0} = State, 111 | {Events, Broadcasts1} = swim_broadcasts:take(Target, Broadcasts0), 112 | NumMembers = swim_membership:size(Membership), 113 | Retransmits = swim_broadcasts:retransmit_limit(NumMembers, Broadcasts1), 114 | Broadcasts2 = swim_broadcasts:prune(Retransmits, Broadcasts1), 115 | {reply, Events, State#state{broadcasts = Broadcasts2}}; 116 | handle_call(_Msg, _From, State) -> 117 | {noreply, State}. 118 | 119 | %% @private 120 | handle_cast({ack, Member}, #state{current_probe = {Member, Incarnation}} = State) -> 121 | {noreply, handle_ack(Member, Incarnation, State)}; 122 | handle_cast({probe_timeout, Member, MissedNacks}, State) -> 123 | {noreply, handle_probe_timeout(Member, MissedNacks, State)}; 124 | handle_cast({publish, Event}, State) -> 125 | Broadcasts = swim_broadcasts:insert({user, Event}, State#state.broadcasts), 126 | {noreply, State#state{broadcasts = Broadcasts}}; 127 | handle_cast({broadcast_event, Event}, State) -> 128 | {Events, Membership} = swim_membership:handle_event(Event, State#state.membership), 129 | Awareness = 130 | case swim_membership:refuted(Events, Membership) of 131 | true -> swim_awareness:failure(State#state.awareness); 132 | false -> State#state.awareness 133 | end, 134 | Broadcasts = swim_broadcasts:insert(Events, State#state.broadcasts), 135 | ok = swim_subscriptions:publish(Events), 136 | {noreply, State#state{membership = Membership, broadcasts = Broadcasts, awareness = Awareness}}; 137 | handle_cast(_Msg, State) -> 138 | {noreply, State}. 139 | 140 | %% @private 141 | handle_info(protocol_period, State) -> 142 | NewState = handle_protocol_period(State), 143 | schedule_next_protocol_period(NewState), 144 | {noreply, NewState}; 145 | handle_info({suspicion_timeout, Member, SuspectedAt}, State) -> 146 | {Events, Membership} = 147 | swim_membership:faulty(Member, SuspectedAt, local, State#state.membership), 148 | Broadcasts = swim_broadcasts:insert(Events, State#state.broadcasts), 149 | ok = swim_subscriptions:publish(Events), 150 | {noreply, State#state{membership = Membership, broadcasts = Broadcasts}}; 151 | handle_info(_Info, State) -> 152 | {noreply, State}. 153 | 154 | %% @private 155 | code_change(_OldVsn, State, _Extra) -> 156 | {ok, State}. 157 | 158 | %% @private 159 | terminate(_Reason, _State) -> 160 | ok. 161 | 162 | handle_probe_timeout(Member, MissedNacks, #state{current_probe = {Member, Incarnation}} = State) -> 163 | #state{membership = Membership0, broadcasts = Broadcasts0, awareness = Awareness0} = State, 164 | {Events, Membership} = swim_membership:suspect(Member, Incarnation, local, Membership0), 165 | Broadcasts = swim_broadcasts:insert(Events, Broadcasts0), 166 | ok = swim_subscriptions:publish(Events), 167 | Awareness = swim_awareness:failure(MissedNacks + 1, Awareness0), 168 | State#state{ 169 | current_probe = undefined, 170 | membership = Membership, 171 | broadcasts = Broadcasts, 172 | awareness = Awareness 173 | }; 174 | handle_probe_timeout(_Member, _MissedNacks, State) -> 175 | State. 176 | 177 | handle_ack(Member, Incarnation, State) -> 178 | #state{membership = Membership0, broadcasts = Broadcasts0, awareness = Awareness0} = State, 179 | {Events, Membership} = swim_membership:alive(Member, Incarnation, Membership0), 180 | Broadcasts = swim_broadcasts:insert(Events, Broadcasts0), 181 | ok = swim_subscriptions:publish(Events), 182 | Awareness = swim_awareness:success(Awareness0), 183 | State#state{ 184 | membership = Membership, 185 | broadcasts = Broadcasts, 186 | awareness = Awareness, 187 | current_probe = undefined 188 | }. 189 | 190 | handle_protocol_period(State) -> 191 | case swim_membership:probe_target(State#state.membership) of 192 | none -> 193 | State; 194 | {{Target, _} = Probe, Membership} -> 195 | ProbeTimeout = swim_awareness:scale(State#state.probe_timeout, State#state.awareness), 196 | ok = swim_failure:probe(Target, State#state.ack_timeout, ProbeTimeout), 197 | State#state{current_probe = Probe, membership = Membership} 198 | end. 199 | 200 | schedule_next_protocol_period(State) -> 201 | #state{awareness = Awareness, protocol_period = ProtocolPeriod} = State, 202 | Timeout = swim_awareness:scale(ProtocolPeriod, Awareness), 203 | swim_time:send_after(Timeout, self(), protocol_period). 204 | -------------------------------------------------------------------------------- /src/swim_subscriptions.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | -module(swim_subscriptions). 22 | -behavior(gen_event). 23 | 24 | -export([start_link/0]). 25 | 26 | -export([subscribe/2]). 27 | -export([unsubscribe/2]). 28 | -export([publish/1]). 29 | 30 | -export([init/1]). 31 | -export([handle_event/2]). 32 | -export([handle_call/2]). 33 | -export([handle_info/2]). 34 | -export([terminate/2]). 35 | -export([code_change/3]). 36 | 37 | -record(state, { 38 | pid :: pid(), 39 | event_category :: user | membership, 40 | mref :: reference() 41 | }). 42 | 43 | start_link() -> 44 | gen_event:start_link({local, ?MODULE}). 45 | 46 | subscribe(EventCategory, Pid) -> 47 | gen_event:add_handler(?MODULE, ?MODULE, [EventCategory, Pid]). 48 | 49 | unsubscribe(EventCategory, Pid) -> 50 | gen_event:delete_handler(?MODULE, ?MODULE, [EventCategory, Pid]). 51 | 52 | publish(Events) when is_list(Events) -> 53 | [publish(Event) || Event <- Events], 54 | ok; 55 | publish({membership, {alive, _, M}}) -> 56 | gen_event:notify(?MODULE, {membership, {alive, M}}); 57 | publish({membership, {faulty, _, M, T}}) -> 58 | gen_event:notify(?MODULE, {membership, {faulty, M, T}}); 59 | publish({user, Event}) -> 60 | gen_event:notify(?MODULE, {user, Event}); 61 | publish(_) -> 62 | ok. 63 | 64 | init([EventCategory, Pid]) -> 65 | MRef = erlang:monitor(process, Pid), 66 | {ok, #state{pid = Pid, event_category = EventCategory, mref = MRef}}. 67 | 68 | handle_event({EventCategory, _Event} = Data, State) 69 | when State#state.event_category =:= EventCategory -> 70 | State#state.pid ! {swim, Data}, 71 | {ok, State}; 72 | handle_event(_Event, State) -> 73 | {ok, State}. 74 | 75 | handle_call(_Msg, State) -> 76 | {ok, ok, State}. 77 | 78 | handle_info({'DOWN', MRef, process, Pid, _Reason}, State) 79 | when State#state.mref =:= MRef andalso State#state.pid =:= Pid -> 80 | remove_handler; 81 | handle_info(_Info, State) -> 82 | {ok, State}. 83 | 84 | terminate(_Reason, _State) -> 85 | ok. 86 | 87 | code_change(_OldVsn, State, _Extra) -> 88 | {ok, State}. 89 | -------------------------------------------------------------------------------- /src/swim_sup.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | -module(swim_sup). 22 | -behavior(supervisor). 23 | 24 | -export([start_link/0]). 25 | -export([init/1]). 26 | 27 | start_link() -> 28 | supervisor:start_link({local, ?MODULE}, ?MODULE, []). 29 | 30 | init([]) -> 31 | ListenIP = application:get_env(swim, ip, {127,0,0,1}), 32 | ListenPort = application:get_env(swim, port, 5000), 33 | AckTimeout = application:get_env(swim, ack_timeout, 100), 34 | NackTimeout = application:get_env(swim, nack_timeout, floor(AckTimeout * 0.8)), 35 | ProbeTimeout = application:get_env(swim, probe_timeout, 500), 36 | ProtocolPeriod = application:get_env(swim, protocol_period, 1000), 37 | NumProxies = application:get_env(swim, num_proxies, 3), 38 | SuspicionFactor = application:get_env(swim, suspicion_factor, 3), 39 | AwarenessCount = application:get_env(swim, awareness_count, 8), 40 | Alpha = application:get_env(swim, alpha, 5), 41 | Beta = application:get_env(swim, beta, 6), 42 | Retransmits = application:get_env(swim, retransmit_factor, 3), 43 | MaxMessageSize = application:get_env(swim, max_message_size, 452), 44 | LocalMember = {ListenIP, ListenPort}, 45 | Membership = swim_membership:new(LocalMember, Alpha, Beta, ProbeTimeout, SuspicionFactor), 46 | Broadcasts = swim_broadcasts:new(Retransmits, MaxMessageSize), 47 | Awareness = swim_awareness:new(AwarenessCount), 48 | StateOpts = #{ 49 | protocol_period => ProtocolPeriod, 50 | probe_timeout => ProbeTimeout, 51 | ack_timeout => AckTimeout, 52 | num_proxies => NumProxies 53 | }, 54 | State = #{id => state, 55 | start => {swim_state, start_link, [Membership, Broadcasts, Awareness, StateOpts]}}, 56 | Keyring = swim_keyring:new(get_key()), 57 | Failure = #{id => failure, 58 | start => {swim_failure, start_link, 59 | [LocalMember, Keyring, AckTimeout, NackTimeout]}}, 60 | PushPull = #{id => pushpull, 61 | start => {swim_pushpull_sup, start_link, [ListenIP, ListenPort]}}, 62 | Metrics = #{id => metrics, 63 | start => {swim_metrics, start_link, []}}, 64 | Subscriptions = #{id => subscriptions, 65 | start => {swim_subscriptions, start_link, []}}, 66 | Flags = #{strategy => rest_for_one, 67 | intensity => 5, 68 | period => 900 69 | }, 70 | {ok, {Flags, [State, Failure, PushPull, Subscriptions, Metrics]}}. 71 | 72 | read_key_file({ok, KeyFile}) -> 73 | {ok, EncodedKey} = file:read_file(KeyFile), 74 | [base64:decode(EncodedKey)]; 75 | read_key_file(undefined) -> 76 | [crypto:strong_rand_bytes(32)]. 77 | 78 | get_key() -> 79 | case application:get_env(swim, key) of 80 | {ok, Base64Key} -> 81 | [base64:decode(Base64Key)]; 82 | undefined -> 83 | read_key_file(application:get_env(swim, keyfile)) 84 | end. 85 | -------------------------------------------------------------------------------- /src/swim_time.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | %%% @copyright 2015-2017 19 | %%% @version {@version} 20 | 21 | -module(swim_time). 22 | 23 | -export([send_after/3]). 24 | -export([cancel_timer/1]). 25 | -export([cancel_timer/2]). 26 | -export([monotonic_time/0]). 27 | 28 | send_after(Time, Dest, Msg) -> 29 | erlang:send_after(Time, Dest, Msg). 30 | 31 | cancel_timer(TRef) -> 32 | erlang:cancel_timer(TRef). 33 | 34 | cancel_timer(TRef, Options) -> 35 | erlang:cancel_timer(TRef, Options). 36 | 37 | monotonic_time() -> 38 | erlang:monotonic_time(). 39 | -------------------------------------------------------------------------------- /test/property_test/prop_swim_broadcasts.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | -module(prop_swim_broadcasts). 19 | 20 | -include_lib("proper/include/proper.hrl"). 21 | 22 | -compile([export_all]). 23 | 24 | -import(swim_generators, [swim_event/0]). 25 | 26 | -record(state, { 27 | events = [] :: {non_neg_integer(), swim:swim_event()}, 28 | pruned = [] :: swim:membership_event() 29 | }). 30 | 31 | g_retransmits() -> 32 | range(1, 10). 33 | 34 | initial_state() -> 35 | #state{events = [], pruned = []}. 36 | 37 | command(_State) -> 38 | frequency([ 39 | {1, {call, ?MODULE, insert, [swim_event()]}}, 40 | {1, {call, ?MODULE, take, []}}, 41 | {1, {call, ?MODULE, prune, [g_retransmits()]}} 42 | ]). 43 | 44 | precondition(#state{events = []}, {call, ?MODULE, take, _}) -> 45 | false; 46 | precondition(#state{events = []}, {call, ?MODULE, prune, _}) -> 47 | false; 48 | precondition(_State, _Call) -> 49 | true. 50 | 51 | next_state(State, _V, {call, ?MODULE, insert, [Event]}) -> 52 | State#state{events = lists:sort(fun sort/2, [{0, Event} | State#state.events])}; 53 | next_state(State, _V, {call, ?MODULE, take, []}) -> 54 | N = min(length(State#state.events), 11), 55 | {Taken0, Rest} = lists:split(N, lists:sort(fun sort/2, State#state.events)), 56 | Taken = [{T + 1, E} || {T, E} <- Taken0], 57 | State#state{events = lists:sort(fun sort/2, Taken ++ Rest)}; 58 | next_state(State, _V, {call, ?MODULE, prune, [Retransmit]}) -> 59 | Partition = fun({T, _}) -> T < Retransmit end, 60 | {Keep, Pruned0} = lists:partition(Partition, State#state.events), 61 | Pruned = lists:foldl(fun({_, E}, Acc) -> [E | Acc] end, State#state.pruned, Pruned0), 62 | State#state{events = lists:sort(fun sort/2, Keep), pruned = Pruned}. 63 | 64 | postcondition(State, {call, ?MODULE, take, []}, Result) -> 65 | Events = [E || {_, E} <- State#state.events], 66 | lists:all(fun(M) -> lists:member(M, Events) end, Result); 67 | postcondition(_State, {call, ?MODULE, insert, [_Event]}, _Result) -> 68 | true; 69 | postcondition(_State, {call, ?MODULE, prune, [_Retransmit]}, _Result) -> 70 | true. 71 | 72 | prop_swim_broadcasts() -> 73 | ?FORALL(Cmds, commands(?MODULE), 74 | begin 75 | start_link(), 76 | {H, S, R} = run_commands(?MODULE, Cmds), 77 | stop(), 78 | ?WHENFAIL( 79 | print_results(H, S, R), 80 | aggregate(command_names(Cmds), R =:= ok)) 81 | end). 82 | 83 | print_results(H, S, R) -> 84 | io:format("History: ~p~nState: ~p~nResult:~p~n", [H, S, R]). 85 | 86 | sort({_, {user, _}}, {_, {membership, _}}) -> false; 87 | sort({_, {membership, _}}, {_, {user, _}}) -> true; 88 | sort(A, B) -> A =< B. 89 | 90 | take() -> 91 | gen_server:call(?MODULE, take, 500). 92 | 93 | insert(Event) -> 94 | gen_server:call(?MODULE, {insert, Event}, 500). 95 | 96 | prune(Retransmits) -> 97 | gen_server:call(?MODULE, {prune, Retransmits}, 500). 98 | 99 | start_link() -> 100 | gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). 101 | 102 | stop() -> 103 | gen_server:stop(?MODULE). 104 | 105 | init([]) -> 106 | {ok, swim_broadcasts:new(3)}. 107 | 108 | handle_call(take, _From, Broadcasts0) -> 109 | {Take, Broadcasts} = swim_broadcasts:take(Broadcasts0), 110 | {reply, Take, Broadcasts}; 111 | handle_call({insert, Event}, _From, Broadcasts) -> 112 | {reply, ok, swim_broadcasts:insert(Event, Broadcasts)}; 113 | handle_call({prune, Num}, _From, Broadcasts) -> 114 | {reply, ok, swim_broadcasts:prune(Num, Broadcasts)}. 115 | 116 | handle_cast(_Msg, State) -> 117 | {noreply, State}. 118 | 119 | handle_info(_Info, State) -> 120 | {noreply, State}. 121 | 122 | code_change(_OldVsn, State, _Extra) -> 123 | {ok, State}. 124 | 125 | terminate(_Reason, _State) -> 126 | ok. 127 | -------------------------------------------------------------------------------- /test/property_test/prop_swim_keyring.erl: -------------------------------------------------------------------------------- 1 | -module(prop_swim_keyring). 2 | 3 | -include_lib("proper/include/proper.hrl"). 4 | 5 | -export([prop_encryption/0]). 6 | 7 | -import(swim_generators, [swim_message/0]). 8 | 9 | g_key() -> 10 | binary(32). 11 | 12 | g_symmetric_keys() -> 13 | ?LET(Key, g_key(), {swim_keyring:new([Key]), swim_keyring:new([Key])}). 14 | 15 | g_asymmetric_keys() -> 16 | ?LET({Key1, Key2}, 17 | ?SUCHTHAT({K1, K2}, {g_key(), g_key()}, K1 /= K2), 18 | {swim_keyring:new([Key1]), swim_keyring:new([Key2])}). 19 | 20 | g_keypair() -> 21 | oneof([g_symmetric_keys(), g_asymmetric_keys()]). 22 | 23 | g_encoded_message() -> 24 | ?LET(Message, swim_message(), 25 | iolist_to_binary(swim_messages:encode(Message))). 26 | 27 | prop_encryption() -> 28 | ?FORALL(Keypair, g_keypair(), 29 | ?FORALL(Message, g_encoded_message(), 30 | begin 31 | case is_symmetric(Keypair) of 32 | true -> 33 | aggregate([symmetric], 34 | assert_encryption(Keypair, Message)); 35 | false -> 36 | aggregate([asynmetric], 37 | refute_encryption(Keypair, Message)) 38 | end 39 | end)). 40 | 41 | is_symmetric({Key, Key}) -> 42 | true; 43 | is_symmetric(_) -> 44 | false. 45 | 46 | assert_encryption({Key1, Key2}, Message) -> 47 | Encrypted = swim_keyring:encrypt(Message, Key1), 48 | case swim_keyring:decrypt(Encrypted, Key2) of 49 | {ok, Message} -> 50 | true; 51 | _ -> 52 | false 53 | end. 54 | 55 | refute_encryption({Key1, Key2}, Message) -> 56 | Encrypted = swim_keyring:encrypt(Message, Key1), 57 | case swim_keyring:decrypt(Encrypted, Key2) of 58 | {error, failed_verification} -> 59 | true; 60 | _ -> 61 | false 62 | end. 63 | -------------------------------------------------------------------------------- /test/property_test/prop_swim_membership.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017 All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | -module(prop_swim_membership). 19 | 20 | -include_lib("proper/include/proper.hrl"). 21 | 22 | -behavior(proper_statem). 23 | 24 | -export([prop_membership/0]). 25 | 26 | -export([command/1]). 27 | -export([initial_state/0]). 28 | -export([next_state/3]). 29 | -export([postcondition/3]). 30 | -export([precondition/2]). 31 | 32 | -export([alive/2]). 33 | -export([suspect/2]). 34 | -export([faulty/2]). 35 | -export([members/0]). 36 | 37 | -export([start_link/1]). 38 | -export([init/1]). 39 | -export([handle_call/3]). 40 | -export([handle_cast/2]). 41 | -export([handle_info/2]). 42 | -export([code_change/3]). 43 | -export([terminate/2]). 44 | 45 | -import(swim_generators, [g_member/0, g_incarnation/0]). 46 | 47 | -record(state, { 48 | me :: swim:member(), 49 | incarnation = 0 :: swim:incarnation(), 50 | members = [] :: [{swim:member(), alive | suspect | faulty, swim:incarnation()}] 51 | }). 52 | 53 | g_local_member(State) -> 54 | {exactly(State#state.me), exactly(State#state.incarnation)}. 55 | 56 | g_non_local_member(State) -> 57 | ?LET(IncFactor, range(-1, 1), 58 | ?LET({Member, _CurrentStatus, CurrentInc}, 59 | oneof(State#state.members), 60 | {Member, CurrentInc + IncFactor})). 61 | 62 | g_existing_member(State) -> 63 | oneof([g_local_member(State), g_non_local_member(State)]). 64 | 65 | g_existing_suspected_member(State) -> 66 | ?LET({Member, _CurrentStatus, CurrentInc}, 67 | ?SUCHTHATMAYBE({_Member, CurrentStatus, _CurrentInc}, 68 | oneof(State#state.members), 69 | CurrentStatus =:= suspect), 70 | {Member, CurrentInc}). 71 | 72 | g_suspected_member(State) -> 73 | frequency([{1, {g_member(State), g_incarnation()}}] ++ 74 | [{1, g_existing_member(State)} || State#state.members =/= []] ++ 75 | [{2, g_existing_suspected_member(State)} || State#state.members =/= []]). 76 | 77 | g_suspecting_member(State) -> 78 | frequency([{5, g_member(State)}, {2, local}]). 79 | 80 | g_member(State) -> 81 | frequency([{1, g_member()}] ++ 82 | [{3, g_existing_member(State)} || State#state.members =/= []]). 83 | 84 | initial_state() -> 85 | #state{me = {{127,0,0,1},5000}}. 86 | 87 | command(State) -> 88 | oneof([ 89 | {call, ?MODULE, alive, [g_member(State), g_incarnation()]}, 90 | {call, ?MODULE, suspect, 91 | [g_suspected_member(State), g_suspecting_member(State)]}, 92 | {call, ?MODULE, faulty, 93 | [g_suspected_member(State), g_suspecting_member(State)]}, 94 | {call, ?MODULE, members, []} 95 | ]). 96 | 97 | precondition(#state{members = []}, {call, ?MODULE, suspect, _}) -> 98 | false; 99 | precondition(#state{members = []}, {call, ?MODULE, faulty, _}) -> 100 | false; 101 | precondition(_State, {call, ?MODULE, suspect, [Member, _Inc, Member]}) -> 102 | false; 103 | precondition(_State, {call, ?MODULE, faulty, [Member, _Inc, Member]}) -> 104 | false; 105 | precondition(_State, _Call) -> 106 | true. 107 | 108 | postcondition(State, {call, ?MODULE, members, []}, Members) -> 109 | ordsets:subtract(ordsets:from_list(State#state.members), 110 | ordsets:from_list(Members)) =:= []; 111 | postcondition(_State, {call, ?MODULE, alive, [_Member, _Inc]}, ok) -> 112 | true; 113 | postcondition(_State, {call, ?MODULE, suspect, [{_Member, _Inc}, _From]}, ok) -> 114 | true; 115 | postcondition(_State, {call, ?MODULE, faulty, [{_Member, _Inc}, _From]}, ok) -> 116 | true. 117 | 118 | next_state(State, _V, {call, ?MODULE, members, []}) -> 119 | State; 120 | next_state(State, _V, {call, ?MODULE, alive, [Member, Incarnation]}) -> 121 | #state{members = KnownMembers, incarnation = LocalIncarnation} = State, 122 | case State#state.me =:= Member of 123 | true -> 124 | case Incarnation > LocalIncarnation of 125 | true -> 126 | State#state{incarnation = Incarnation + 1}; 127 | false -> 128 | State 129 | end; 130 | false -> 131 | case lists:keytake(Member, 1, KnownMembers) of 132 | false -> 133 | NewMembers = [{Member, alive, Incarnation} | KnownMembers], 134 | State#state{members = NewMembers}; 135 | {value, {Member, _CurrentStatus, CurrentIncarnation}, Rest} 136 | when Incarnation > CurrentIncarnation -> 137 | NewMembers = [{Member, alive, Incarnation} | Rest], 138 | State#state{members = NewMembers}; 139 | _ -> 140 | State 141 | end 142 | end; 143 | next_state(State, _V, {call, ?MODULE, suspect, [{Member, Incarnation}, _From]}) -> 144 | case State#state.me =:= Member of 145 | true -> 146 | case Incarnation >= State#state.incarnation of 147 | true -> 148 | State#state{incarnation = Incarnation + 1}; 149 | false -> 150 | State 151 | end; 152 | false -> 153 | case lists:keytake(Member, 1, State#state.members) of 154 | false -> 155 | State; 156 | {value, {Member, _CurrentStatus, CurrentIncarnation}, Rest} 157 | when Incarnation >= CurrentIncarnation -> 158 | NewMembers = [{Member, suspect, Incarnation} | Rest], 159 | State#state{members = NewMembers}; 160 | _ -> 161 | State 162 | end 163 | end; 164 | next_state(State, _V, {call, ?MODULE, faulty, [{Member, Incarnation}, _From]}) -> 165 | case State#state.me =:= Member of 166 | true -> 167 | case Incarnation >= State#state.incarnation of 168 | true -> 169 | State#state{incarnation = Incarnation + 1}; 170 | false -> 171 | State 172 | end; 173 | false -> 174 | case lists:keytake(Member, 1, State#state.members) of 175 | false -> 176 | State; 177 | {value, {Member, suspect, CurrentIncarnation}, Rest} 178 | when Incarnation >= CurrentIncarnation -> 179 | State#state{members = Rest}; 180 | _ -> 181 | State 182 | end 183 | end. 184 | 185 | prop_membership() -> 186 | ?FORALL(Cmds, commands(?MODULE), 187 | begin 188 | {ok, _} = start_link({{127,0,0,1}, 5000}), 189 | {H, S, R} = run_commands(?MODULE, Cmds), 190 | stop(), 191 | ?WHENFAIL( 192 | io:format("History: ~p~nState: ~p~nResult: ~p~n", [H, S, R]), 193 | aggregate(command_names(Cmds), R =:= ok)) 194 | end). 195 | 196 | alive(Member, Incarnation) -> 197 | gen_server:call(?MODULE, {alive, Member, Incarnation}). 198 | 199 | suspect({Member, Incarnation}, From) -> 200 | gen_server:call(?MODULE, {suspect, Member, Incarnation, From}). 201 | 202 | faulty({Member, Incarnation}, From) -> 203 | gen_server:call(?MODULE, {faulty, Member, Incarnation, From}). 204 | 205 | members() -> 206 | gen_server:call(?MODULE, members). 207 | 208 | start_link(LocalMember) -> 209 | gen_server:start_link({local, ?MODULE}, ?MODULE, [LocalMember], []). 210 | 211 | stop() -> 212 | gen_server:stop(?MODULE). 213 | 214 | init([LocalMember]) -> 215 | {ok, swim_membership:new(LocalMember, 5, 6, 500, 3)}. 216 | 217 | handle_call({alive, Member, Incarnation}, _, Membership0) -> 218 | {_, Membership} = swim_membership:alive(Member, Incarnation, Membership0), 219 | {reply, ok, Membership}; 220 | handle_call({suspect, Member, Incarnation, From}, _, Membership0) -> 221 | {_, Membership} = swim_membership:suspect(Member, Incarnation, From, Membership0), 222 | {reply, ok, Membership}; 223 | handle_call({faulty, Member, Incarnation, From}, _, Membership0) -> 224 | {_, Membership} = swim_membership:faulty(Member, Incarnation, From, Membership0), 225 | {reply, ok, Membership}; 226 | handle_call(members, _, Membership) -> 227 | Members = swim_membership:members(Membership), 228 | {reply, Members, Membership}. 229 | 230 | handle_cast(_Msg, Membership) -> 231 | {noreply, Membership}. 232 | 233 | handle_info(_Info, Membership) -> 234 | {noreply, Membership}. 235 | 236 | code_change(_OldVsn, State, _Extra) -> 237 | {ok, State}. 238 | 239 | terminate(_Reason, _State) -> 240 | ok. 241 | 242 | -------------------------------------------------------------------------------- /test/property_test/prop_swim_messages.erl: -------------------------------------------------------------------------------- 1 | %%% ---------------------------------------------------------------------------- 2 | %%% Copyright (c) 2015-2017. All Rights Reserved. 3 | %%% 4 | %%% Licensed under the Apache License, 5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance 6 | %%% with the License. 7 | %%% You may obtain a copy of the License at 8 | %%% 9 | %%% http://www.apache.org/licenses/LICENSE-2.0 10 | %%% 11 | %%% Unless required by applicable law or agreed to in writing, software 12 | %%% distributed under the License is distributed on an "AS IS" BASIS, 13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | %%% See the License for the specific language governing permissions and 15 | %%% limitiations under the License. 16 | %%% ---------------------------------------------------------------------------- 17 | 18 | -module(prop_swim_messages). 19 | 20 | -include_lib("proper/include/proper.hrl"). 21 | 22 | -export([prop_encode_decode/0]). 23 | 24 | -import(swim_generators, [swim_message/0]). 25 | 26 | prop_encode_decode() -> 27 | ?FORALL({Type, _Events} = Msg, swim_message(), 28 | aggregate([element(1, Type)], begin 29 | Data = iolist_to_binary(swim_messages:encode(Msg)), 30 | Msg =:= swim_messages:decode(Data) 31 | end)). 32 | -------------------------------------------------------------------------------- /test/swim_SUITE.erl: -------------------------------------------------------------------------------- 1 | -module(swim_SUITE). 2 | 3 | -include_lib("common_test/include/ct.hrl"). 4 | 5 | -export([all/0]). 6 | -export([init_per_suite/1]). 7 | -export([end_per_suite/1]). 8 | 9 | -export([prop_swim_broadcasts/1]). 10 | -export([prop_swim_membership/1]). 11 | -export([prop_swim_keyring/1]). 12 | -export([prop_swim_messages/1]). 13 | 14 | all() -> 15 | [ 16 | prop_swim_membership, 17 | prop_swim_messages, 18 | prop_swim_keyring, 19 | prop_swim_broadcasts 20 | ]. 21 | 22 | init_per_suite(Config) -> 23 | ct_property_test:init_per_suite(Config). 24 | 25 | end_per_suite(_Config) -> 26 | ok. 27 | 28 | prop_swim_broadcasts(Config) -> 29 | ct_property_test:quickcheck(prop_swim_broadcasts:prop_swim_broadcasts(), Config). 30 | 31 | prop_swim_membership(Config) -> 32 | ct_property_test:quickcheck(prop_swim_membership:prop_membership(), Config). 33 | 34 | prop_swim_keyring(Config) -> 35 | ct_property_test:quickcheck(prop_swim_keyring:prop_encryption(), Config). 36 | 37 | prop_swim_messages(Config) -> 38 | ct_property_test:quickcheck(prop_swim_messages:prop_encode_decode(), Config). 39 | -------------------------------------------------------------------------------- /test/swim_failure_SUITE.erl: -------------------------------------------------------------------------------- 1 | -module(swim_failure_SUITE). 2 | 3 | -include_lib("common_test/include/ct.hrl"). 4 | 5 | -export([all/0]). 6 | -export([groups/0]). 7 | -export([init_per_suite/1]). 8 | -export([end_per_suite/1]). 9 | -export([init_per_group/2]). 10 | -export([end_per_group/2]). 11 | 12 | -export([ping/1]). 13 | -export([ping_req/1]). 14 | 15 | all() -> 16 | [{group, with_client}]. 17 | 18 | groups() -> 19 | [{with_client, [shuffle, sequence], [ping, ping_req]}]. 20 | 21 | local_member() -> 22 | {{127,0,0,1}, 9200}. 23 | 24 | remote_member() -> 25 | {{127,0,0,1}, 9000}. 26 | 27 | init_per_suite(Config) -> 28 | error_logger:tty(false), 29 | Key = crypto:strong_rand_bytes(32), 30 | RemoteMember = remote_member(), 31 | ok = application:set_env(swim, port, element(2, RemoteMember)), 32 | ok = application:set_env(swim, key, base64:encode(Key)), 33 | ok = application:start(swim), 34 | [{local_member, local_member()}, {remote_member, RemoteMember}, {key, Key} | Config]. 35 | 36 | end_per_suite(_Config) -> 37 | ok = application:stop(swim), 38 | error_logger:tty(true), 39 | ok. 40 | 41 | init_per_group(with_client, Config) -> 42 | {ok, Client} = swim_test_client:start(?config(local_member, Config), 43 | ?config(key, Config)), 44 | [{client, Client} | Config]. 45 | 46 | end_per_group(with_client, Config) -> 47 | Client = ?config(client, Config), 48 | ok = swim_test_client:stop(Client), 49 | Config. 50 | 51 | ping(Config) -> 52 | Target = ?config(remote_member, Config), 53 | {ack, 1, Target} = call({ping, 1, Target}, Config), 54 | ok. 55 | 56 | ping_req(Config) -> 57 | Target = ?config(remote_member, Config), 58 | Terminal = ?config(local_member, Config), 59 | {ack, 2, Terminal} = call({ping_req, 2, Target, Terminal}, Config), 60 | ok. 61 | 62 | call(Msg, Config) -> 63 | swim_test_client:call(?config(client, Config), Msg). 64 | -------------------------------------------------------------------------------- /test/swim_generators.erl: -------------------------------------------------------------------------------- 1 | -module(swim_generators). 2 | 3 | -include_lib("proper/include/proper.hrl"). 4 | 5 | -compile([export_all]). 6 | 7 | g_ip_address() -> 8 | ip_address(). 9 | 10 | ip_address() -> 11 | oneof([ 12 | tuple([range(0, 255) || _ <- lists:seq(1, 4)]), 13 | tuple([range(0, 65535) || _ <- lists:seq(1, 8)]) 14 | ]). 15 | 16 | g_port_number() -> 17 | port_number(). 18 | 19 | port_number() -> 20 | range(0, 65535). 21 | 22 | g_incarnation() -> 23 | incarnation(). 24 | 25 | incarnation() -> 26 | range(0, 1 bsl 32). 27 | 28 | g_membership_event() -> 29 | membership_event(). 30 | 31 | membership_event() -> 32 | ?LET(Event, 33 | oneof([suspect_event(), alive_event(), faulty_event()]), 34 | {membership, Event}). 35 | 36 | g_suspect_event() -> 37 | suspect_event(). 38 | 39 | suspect_event() -> 40 | ?LET({Incarnation, Member, From}, 41 | {incarnation(), member(), member()}, 42 | {suspect, Incarnation, Member, From}). 43 | 44 | g_alive_event() -> 45 | alive_event(). 46 | 47 | alive_event() -> 48 | ?LET({Incarnation, Member}, 49 | {incarnation(), member()}, 50 | {alive, Incarnation, Member}). 51 | 52 | g_faulty_event() -> 53 | faulty_event(). 54 | 55 | faulty_event() -> 56 | ?LET({Incarnation, Member, From}, 57 | {incarnation(), member(), member()}, 58 | {faulty, Incarnation, Member, From}). 59 | 60 | g_user_event() -> 61 | user_event(). 62 | 63 | user_event() -> 64 | ?LET(Bin, binary(), {user, Bin}). 65 | 66 | g_swim_event() -> 67 | swim_event(). 68 | 69 | swim_event() -> 70 | oneof([user_event(), membership_event()]). 71 | 72 | g_sequence() -> 73 | sequence(). 74 | 75 | sequence() -> 76 | range(0, 1 bsl 32). 77 | 78 | g_member() -> 79 | member(). 80 | 81 | member() -> 82 | tuple([ip_address(), port_number()]). 83 | 84 | g_swim_events() -> 85 | swim_events(). 86 | 87 | swim_events() -> 88 | ?SIZED(Size, swim_events(Size)). 89 | 90 | swim_events(Size) when Size > 256 -> 91 | resize(round(Size / 2), list(swim_event())); 92 | swim_events(_Size) -> 93 | list(swim_event()). 94 | 95 | g_ack() -> 96 | ack(). 97 | 98 | ack() -> 99 | ?LET({Seq, Target, Events}, {sequence(), member(), swim_events()}, 100 | {{ack, Seq, Target}, Events}). 101 | 102 | g_nack() -> 103 | nack(). 104 | 105 | nack() -> 106 | ?LET({Seq, Target, Events}, {sequence(), member(), swim_events()}, 107 | {{nack, Seq, Target}, Events}). 108 | 109 | g_ping() -> 110 | ping(). 111 | 112 | ping() -> 113 | ?LET({Seq, Target, Events}, {sequence(), member(), swim_events()}, 114 | {{ping, Seq, Target}, Events}). 115 | 116 | g_ping_req() -> 117 | ping_req(). 118 | 119 | ping_req() -> 120 | ?LET({Seq, Target, Events}, {sequence(), member(), swim_events()}, 121 | {{ping_req, Seq, Target}, Events}). 122 | 123 | g_swim_message() -> 124 | swim_message(). 125 | 126 | swim_message() -> 127 | oneof([ack(), ping(), ping_req(), nack()]). 128 | -------------------------------------------------------------------------------- /test/swim_test_client.erl: -------------------------------------------------------------------------------- 1 | -module(swim_test_client). 2 | -behavior(gen_server). 3 | 4 | -export([start/2]). 5 | -export([stop/1]). 6 | -export([call/2]). 7 | 8 | -export([init/1]). 9 | -export([handle_call/3]). 10 | -export([handle_cast/2]). 11 | -export([handle_info/2]). 12 | -export([code_change/3]). 13 | -export([terminate/2]). 14 | 15 | -record(state, { 16 | local_member, 17 | keyring, 18 | socket, 19 | requests 20 | }). 21 | 22 | start(LocalMember, Key) -> 23 | gen_server:start(?MODULE, [LocalMember, Key], []). 24 | 25 | stop(Pid) -> 26 | gen_server:stop(Pid). 27 | 28 | call(Pid, Msg) -> 29 | try 30 | gen_server:call(Pid, Msg, 500) 31 | catch 32 | _:_ -> 33 | timeout 34 | end. 35 | 36 | init([{_, Port} = LocalMember, Key]) -> 37 | Keyring = swim_keyring:new([Key]), 38 | {ok, Socket} = gen_udp:open(Port, [binary, {active, true}]), 39 | {ok, #state{requests = #{}, socket = Socket, keyring = Keyring, local_member = LocalMember}}. 40 | 41 | handle_call({ping, Sequence, {Ip, Port} = Target}, From, State) -> 42 | Msg = swim_messages:encode({{ping, Sequence, Target}, []}), 43 | Payload = swim_keyring:encrypt(Msg, State#state.keyring), 44 | ok = gen_udp:send(State#state.socket, Ip, Port, Payload), 45 | {noreply, State#state{requests = maps:put(Sequence, From, State#state.requests)}}; 46 | handle_call({ping_req, Sequence, {Ip, Port}, Terminal}, From, State) -> 47 | Msg = swim_messages:encode({{ping_req, Sequence, Terminal}, []}), 48 | Payload = swim_keyring:encrypt(Msg, State#state.keyring), 49 | ok = gen_udp:send(State#state.socket, Ip, Port, Payload), 50 | {noreply, State#state{requests = maps:put(Sequence, From, State#state.requests)}}. 51 | 52 | handle_cast(_Req, State) -> 53 | {noreply, State}. 54 | 55 | handle_info({udp, _Socket, Ip, InPortNo, Packet}, State) -> 56 | case swim_keyring:decrypt(Packet, State#state.keyring) of 57 | {ok, PlainText} -> 58 | try 59 | {Message, _Events} = swim_messages:decode(PlainText), 60 | case Message of 61 | {ack, Sequence, _Target} = Reply -> 62 | case maps:take(Sequence, State#state.requests) of 63 | {From, Requests} -> 64 | gen_server:reply(From, Reply), 65 | {noreply, State#state{requests = Requests}}; 66 | error -> 67 | {noreply, State} 68 | end; 69 | {ping, Sequence, Target} -> 70 | Ack = swim_messages:encode({{ack, Sequence, Target}, []}), 71 | Payload = swim_keyring:encrypt(Ack, State#state.keyring), 72 | ok = gen_udp:send(State#state.socket, Ip, InPortNo, Payload), 73 | {noreply, State}; 74 | _ -> 75 | {noreply, State} 76 | end 77 | catch 78 | _:_ -> 79 | {noreply, State} 80 | end; 81 | {error, failed_verification} -> 82 | {noreply, State} 83 | end; 84 | handle_info(_Info, State) -> 85 | {noreply, State}. 86 | 87 | code_change(_OldVsn, State, _Extra) -> 88 | {ok, State}. 89 | 90 | terminate(_Reason, _State) -> 91 | ok. 92 | --------------------------------------------------------------------------------