├── .gitignore
├── .travis.yml
├── LICENSE
├── Makefile
├── README.md
├── doc
├── README.md
├── edoc-info
├── erlang.png
├── overview.edoc
├── stylesheet.css
├── swim.md
├── swim_broadcasts.md
├── swim_membership.md
├── swim_messages.md
├── swim_subscriptions.md
└── swim_transport.md
├── rebar.config
├── rebar.lock
├── src
├── swim.app.src
├── swim.erl
├── swim_app.erl
├── swim_awareness.erl
├── swim_broadcasts.erl
├── swim_failure.erl
├── swim_keyring.erl
├── swim_membership.erl
├── swim_messages.erl
├── swim_metrics.erl
├── swim_pushpull.erl
├── swim_pushpull_sup.erl
├── swim_socket.erl
├── swim_state.erl
├── swim_subscriptions.erl
├── swim_sup.erl
└── swim_time.erl
└── test
├── property_test
├── prop_swim_broadcasts.erl
├── prop_swim_keyring.erl
├── prop_swim_membership.erl
└── prop_swim_messages.erl
├── swim_SUITE.erl
├── swim_failure_SUITE.erl
├── swim_generators.erl
└── swim_test_client.erl
/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | ttb_last_config
3 | log/
4 | *~
5 | \#*\#
6 | .\#*
7 | *.beam
8 | TAGS
9 | .DS_Store
10 | .rebar3
11 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: erlang
2 | otp_release:
3 | - 20
4 | - 19
5 | script: make test
6 | cache:
7 | directories:
8 | - ~/.cache/rebar3/
9 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | REBAR ?= $(shell which rebar3)
2 |
3 | .PHONY: test
4 |
5 | compile:
6 | $(REBAR) do xref, dialyzer
7 |
8 | test:
9 | $(REBAR) ct
10 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SWIM - An Awesome Weakly-consistent Infection-style Gossip Protocol #
2 |
3 | Copyright (c) 2015-2018 Tucker Barbour
4 |
5 | __Authors:__ Tucker Barbour ([`tucker.barbour@gmail.com`](mailto:tucker.barbour@gmail.com)).
6 |
7 | __References__* http://www.cs.cornell.edu/~asdas/research/dsn02-SWIM.pdf
8 |
9 | (__WARNING:__ This project is untested in production environments. Do not use in production.)
10 |
11 | ### Intro
12 |
13 | This Application is an Erlang implementation of the
14 | Scalable Weakly-consistent Infection-style Process Group
15 | Membership Protocol (SWIM). As the title implies, SWIM provides
16 | weakly-consistent knowledge of process group membership information to all
17 | participating processes. However, the [*Scalable* part of the title should read:
18 | *Awesome!*](http://erlangcentral.org/scalable-is-awesome-literally-garrett-smith-erlang-user-conference-2015/#.VZWtcXjEo22)
19 | So let's be more specific about what Awesome features SWIM provides:
20 |
21 | - Constant message load (bandwidth) per member regardless of the number
22 | of members in the group
23 | - Constant time to first-detection of a faulty process regardless of
24 | the number of members in the group
25 | - Low false-positive failure detection rate
26 |
27 | ### Project Status
28 |
29 | This project is still under active development and as such the API may change without warning.
30 |
31 | ### Use Cases
32 |
33 | What can we use SWIM for?
34 |
35 | - Reliable multicast
36 | - Epidemic-style information dissemination
37 | - Pub-sub
38 | - Generic peer-to-peer systems
39 |
40 | Really anything that requires each process in a group to maintain a local list
41 | of other non-faulty processes in the group and be notified when members join or
42 | leave, either voluntarily or through failure.
43 |
44 | ### Why?
45 |
46 | Other distributed membership algorithms tradionally use a heartbeating technique.
47 | The heartbeat technique calls for each process in the group to periodically
48 | send an incrementing heartbeat counter to all other processes in the group as well
49 | as respond to incoming heartbeats from other process. A process is detected as
50 | faulty when a heartbeat response is not received from a process in some
51 | period of time. Heartbeat implementations often suffer from scalability limitiations
52 | as the size of the process group grows. Some popular heartbeat architectures
53 | along with potential weaknesses:
54 |
55 | * Centralized - leads to hot-spots and single-point-of-failure
56 | * All-to-All - leads to message load on the network that grows quadratically with the group size
57 | * Logical Ring - unpredicability of failure detection time
58 |
59 | SWIM addresses the problems with tradional heartbeat implementations through a
60 | peer-to-peer randomized probing protocol. I recommend reading the SWIM paper
61 | for more details.
62 |
63 | ### Why Not?
64 |
65 | SWIM provides weak-consistency guarentees of group membership.
66 | If our domain requires stronger consistency of membership awareness then we
67 | should look elsewhere:
68 |
69 | - [Zookeeper](https://zookeeper.apache.org)
70 | - [Paxos](http://research.microsoft.com/en-us/um/people/lamport/pubs/paxos-simple.pdf)
71 | - [Raft](https://www.usenix.org/conference/atc14/technical-sessions/presentation/ongaro)
72 | - [Riak Ensemble](https://github.com/basho/riak_ensemble)
73 |
74 | What if we want more than just membership awareness and fault detection? Say
75 | we want application-level sharding like a consistent-hash ring?
76 | SWIM and this implemention only provide weakly-consistent membership awareness.
77 | You can use SWIM as the underlying gossip protocol to disseminate
78 | ring updates to the group -- but that's up to you and your application. You may
79 | be better off taking a look at other, more specific, implementions like:
80 |
81 | - [Ringpop](https://github.com/uber/ringpop)
82 | - [Plumtree](https://github.com/helium/plumtree)
83 |
84 | What if the information we need to disseminate to the group is large, on
85 | the order of MiB and GiB? This implementation of SWIM uses UDP for
86 | transport and thus has an upper limit on the size of information we can
87 | reliably send per message. Again, we can use SWIM for membership awareness and
88 | write our application logic using TCP to transmit our large data between members.
89 | It might also be worth taking a look at alternative implementations that have
90 | modified the protocol to support both UDP and TCP:
91 |
92 | - [Memberlist](https://github.com/hashicorp/memberlist)
93 |
94 | ### Lifeguard
95 |
96 | We've also included some of the improvements outlined in the [Lifeguard](https://arxiv.org/abs/1707.00788) paper from Hashicorp. You can also find more information about their research on [their website](https://www.hashicorp.com/blog/making-gossip-more-robust-with-lifeguard). On a local 5 node cluster, we have observed a reduction in false positives rates during the threshold experiment. More details of the results will be provided when we have time to conduct a more scientific experiement with this implementation.
97 |
98 | ### Build
99 |
100 | We require OTP-19.x and an OpenSSL that supports AES-GCM. The default on OSX
101 | does not include support for AES-GCM, so it's recommended you use `homebrew` to
102 | install a newer version of OpenSSL and compile OTP linking to the OpenSSL managed
103 | by `homebrew`. Include `--with-ssl=/usr/local/opt/openssl` when compiling OTP.
104 |
105 |
106 |
--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # SWIM - An Awesome Weakly-consistent Infection-style Gossip Protocol #
4 |
5 | Copyright (c) 2015 Tucker Barbour
6 |
7 | __Version:__ Feb 18 2016 10:13:03
8 |
9 | __Authors:__ Tucker Barbour ([`barbct5@gmail.com`](mailto:barbct5@gmail.com)).
10 |
11 | __References__* http://www.cs.cornell.edu/~asdas/research/dsn02-SWIM.pdf
12 |
13 | [](https://travis-ci.org/barbct5/swim)
14 |
15 | (__WARNING:__ This project is untested in production environments. Do not use in production.)
16 |
17 | ### Intro
18 |
19 | This Application is an Erlang implementation of the
20 | Scalable Weakly-consistent Infection-style Process Group
21 | Membership Protocol (SWIM). As the title implies, SWIM provides
22 | weakly-consistent knowledge of process group membership information to all
23 | participating processes. However, the [*Scalable* part of the title should read:
24 | *Awesome!*](http://erlangcentral.org/scalable-is-awesome-literally-garrett-smith-erlang-user-conference-2015/#.VZWtcXjEo22)
25 | So let's be more specific about what Awesome features SWIM provides:
26 |
27 | - Constant message load (bandwidth) per member regardless of the number
28 | of members in the group
29 | - Constant time to first-detection of a faulty process regardless of
30 | the number of members in the group
31 | - Low false-positive failure detection rate
32 |
33 | ### Use Cases
34 |
35 | What can we use SWIM for?
36 |
37 | - Reliable multicast
38 | - Epidemic-style information dissemination
39 | - Pub-sub
40 | - Generic peer-to-peer systems
41 |
42 | Really anything that requires each process in a group to maintain a local list
43 | of other non-faulty processes in the group and be notified when members join or
44 | leave, either voluntarily or through failure.
45 |
46 | ### Why?
47 |
48 | Other distributed membership algorithms tradionally use a heartbeating technique.
49 | The heartbeat technique calls for each process in the group to periodically
50 | send an incrementing heartbeat counter to all other processes in the group as well
51 | as respond to incoming heartbeats from other process. A process is detected as
52 | faulty when a heartbeat response is not received from a process in some
53 | period of time. Heartbeat implementations often suffer from scalability limitiations
54 | as the size of the process group grows. Some popular heartbeat architectures
55 | along with potential weaknesses:
56 |
57 | * Centralized - leads to hot-spots and single-point-of-failure
58 | * All-to-All - leads to message load on the network that grows quadratically with the group size
59 | * Logical Ring - unpredicability of failure detection time
60 |
61 | SWIM addresses the problems with tradional heartbeat implementations through a
62 | peer-to-peer randomized probing protocol. I recommend reading the SWIM paper
63 | for more details.
64 |
65 | ### Why Not?
66 |
67 | SWIM provides weak-consistency guarentees of group membership.
68 | If our domain requires stronger consistency of membership awareness then we
69 | should look elsewhere:
70 |
71 | - [Zookeeper](https://zookeeper.apache.org)
72 | - [Paxos](http://research.microsoft.com/en-us/um/people/lamport/pubs/paxos-simple.pdf)
73 | - [Raft](https://www.usenix.org/conference/atc14/technical-sessions/presentation/ongaro)
74 | - [Riak Ensemble](https://github.com/basho/riak_ensemble)
75 |
76 | What if we want more than just membership awareness and fault detection? Say
77 | we want application-level sharding like a consistent-hash ring?
78 | SWIM and this implemention only provide weakly-consistent membership awareness.
79 | You can use SWIM as the underlying gossip protocol to disseminate
80 | ring updates to the group -- but that's up to you and your application. You may
81 | be better off taking a look at other, more specific, implementions like:
82 |
83 | - [Ringpop](https://github.com/uber/ringpop)
84 | - [Plumtree](https://github.com/helium/plumtree)
85 |
86 | What if the information we need to disseminate to the group is large, on
87 | the order of MiB and GiB? This implementation of SWIM uses UDP for
88 | transport and thus has an upper limit on the size of information we can
89 | reliably send per message. Again, we can use SWIM for membership awareness and
90 | write our application logic using TCP to transmit our large data between members.
91 | It might also be worth taking a look at alternative implementations that have
92 | modified the protocol to support both UDP and TCP:
93 |
94 | - [Memberlist](https://github.com/hashicorp/memberlist)
95 |
96 | ### Details
97 |
98 | If you made it this far and are still interested, you should read the module
99 | documentation which includes details about the implementation.
100 | The pieces of the SWIM protocol are broken down as follows:
101 |
102 | * __*Failure Detection*__ - [`swim`](swim.md)
103 | * __*Membership*__ - [`swim_membership`](swim_membership.md)
104 | * __*Dissemination*__ - [`swim_broadcasts`](swim_broadcasts.md)
105 |
106 | ### How To
107 |
108 | Here is a quick reference for using SWIM in your application. These examples
109 | assume the `crypto` application is already started. We also assume encryption
110 | keys have already been distributed -- that`s outside the scope of SWIM.
111 |
112 | ```erlang
113 |
114 | % On our first node, let us start the seed listening at 192.168.2.10:5000
115 | {ok, Lan} = swim:start_link(lan, {{192,168,2,10}, 5000}, Keys, []).
116 |
117 | % On a different node, let us join the group lan
using 192.168.2.10:5000
118 | % as the seed.
119 | Seeds = [{{192,168,2,10}, 5000}].
120 | {ok, Lan} = swim:start_link(lan, {{192,168,2,11}, 5000}, Keys, [{seeds, Seeds}]).
121 |
122 | % Let us check who else is in the group
123 | swim:members(lan).
124 |
125 | % By default the parent process will receive messages when group membership
126 | % changes. User-provided messages, send via swim:publish/2
, are sent to the
127 | % parent process as well. You can match against the different types of messages
128 | % and membership changes as see below:
129 |
130 | receive
131 | {swim, {membership, {alive, Member, _Incarnation}}} ->
132 | ok = error_logger:info_msg("Member is alive: ~p", [Member]);
133 | {swim, {membership, {faulty, Member, _Incarnation}}} ->
134 | ok = error_logger:info_msg("Member is dead: ~p", [Member]);
135 | {swim, {user, Msg}} ->
136 | ok = error_logger:info_msg("Received user message: ~p", [Msg])
137 | after
138 | 5000 ->
139 | timeout
140 | end.
141 |
142 | % Other non-parent processes can subscribe to receive messages about group
143 | % membership changes as well as user-provided messages.
144 | % Let us subscribe to these events:
145 |
146 | swim:subscribe(lan).
147 |
148 | ```
149 |
150 | ### Build
151 |
152 | We require OTP-18.x and an OpenSSL that supports AES-GCM. The default on OSX
153 | does not include support for AES-GCM, so it's recommended you use `homebrew` to
154 | install a newer version of OpenSSL and compile OTP linking to the OpenSSL managed
155 | by `homebrew`. Include `--with-ssl=/usr/local/opt/openssl` when compiling OTP.
156 |
157 | We use `rebar3`, included in the source of this repo, to build and test `swim`.
158 |
159 | ```
160 |
161 | ./rebar3 do xref, dialyzer, eunit
162 |
163 | ```
164 |
165 | ## Modules ##
166 |
167 |
168 |
174 |
175 |
176 |
--------------------------------------------------------------------------------
/doc/edoc-info:
--------------------------------------------------------------------------------
1 | %% encoding: UTF-8
2 | {application,swim}.
3 | {modules,[swim,swim_broadcasts,swim_membership,swim_messages,
4 | swim_subscriptions,swim_transport]}.
5 |
--------------------------------------------------------------------------------
/doc/erlang.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctbarbour/swim/ca3fe4f45408c4e95e6b8e00662e443ba710737c/doc/erlang.png
--------------------------------------------------------------------------------
/doc/overview.edoc:
--------------------------------------------------------------------------------
1 | @author Tucker Barbour
2 | @copyright 2015-2018 Tucker Barbour
3 | @version {@version}
4 | @title SWIM - An Awesome Weakly-consistent Infection-style Gossip Protocol
5 | @reference http://www.cs.cornell.edu/~asdas/research/dsn02-SWIM.pdf
6 | @doc
7 |
8 | [](https://travis-ci.org/barbct5/swim)
9 |
10 |
11 | (__WARNING:__ This project is untested in production environments. Do not use in production.)
12 |
13 |
14 | ### Intro
15 |
16 | This Application is an Erlang implementation of the
17 | Scalable Weakly-consistent Infection-style Process Group
18 | Membership Protocol (SWIM). As the title implies, SWIM provides
19 | weakly-consistent knowledge of process group membership information to all
20 | participating processes. However, the [*Scalable* part of the title should read:
21 | *Awesome!*](http://erlangcentral.org/scalable-is-awesome-literally-garrett-smith-erlang-user-conference-2015/#.VZWtcXjEo22)
22 | So let's be more specific about what Awesome features SWIM provides:
23 |
24 | - Constant message load (bandwidth) per member regardless of the number
25 | of members in the group
26 | - Constant time to first-detection of a faulty process regardless of
27 | the number of members in the group
28 | - Low false-positive failure detection rate
29 |
30 | ### Use Cases
31 |
32 | What can we use SWIM for?
33 |
34 | - Reliable multicast
35 | - Epidemic-style information dissemination
36 | - Pub-sub
37 | - Generic peer-to-peer systems
38 |
39 | Really anything that requires each process in a group to maintain a local list
40 | of other non-faulty processes in the group and be notified when members join or
41 | leave, either voluntarily or through failure.
42 |
43 | ### Why?
44 |
45 | Other distributed membership algorithms tradionally use a heartbeating technique.
46 | The heartbeat technique calls for each process in the group to periodically
47 | send an incrementing heartbeat counter to all other processes in the group as well
48 | as respond to incoming heartbeats from other process. A process is detected as
49 | faulty when a heartbeat response is not received from a process in some
50 | period of time. Heartbeat implementations often suffer from scalability limitiations
51 | as the size of the process group grows. Some popular heartbeat architectures
52 | along with potential weaknesses:
53 |
54 | * Centralized - leads to hot-spots and single-point-of-failure
55 | * All-to-All - leads to message load on the network that grows quadratically with the group size
56 | * Logical Ring - unpredicability of failure detection time
57 |
58 | SWIM addresses the problems with tradional heartbeat implementations through a
59 | peer-to-peer randomized probing protocol. I recommend reading the SWIM paper
60 | for more details.
61 |
62 | ### Why Not?
63 |
64 | SWIM provides weak-consistency guarentees of group membership.
65 | If our domain requires stronger consistency of membership awareness then we
66 | should look elsewhere:
67 |
68 | - [Zookeeper](https://zookeeper.apache.org)
69 | - [Paxos](http://research.microsoft.com/en-us/um/people/lamport/pubs/paxos-simple.pdf)
70 | - [Raft](https://www.usenix.org/conference/atc14/technical-sessions/presentation/ongaro)
71 | - [Riak Ensemble](https://github.com/basho/riak_ensemble)
72 |
73 | What if we want more than just membership awareness and fault detection? Say
74 | we want application-level sharding like a consistent-hash ring?
75 | SWIM and this implemention only provide weakly-consistent membership awareness.
76 | You can use SWIM as the underlying gossip protocol to disseminate
77 | ring updates to the group -- but that's up to you and your application. You may
78 | be better off taking a look at other, more specific, implementions like:
79 |
80 | - [Ringpop](https://github.com/uber/ringpop)
81 | - [Plumtree](https://github.com/helium/plumtree)
82 |
83 | What if the information we need to disseminate to the group is large, on
84 | the order of MiB and GiB? This implementation of SWIM uses UDP for
85 | transport and thus has an upper limit on the size of information we can
86 | reliably send per message. Again, we can use SWIM for membership awareness and
87 | write our application logic using TCP to transmit our large data between members.
88 | It might also be worth taking a look at alternative implementations that have
89 | modified the protocol to support both UDP and TCP:
90 |
91 | - [Memberlist](https://github.com/hashicorp/memberlist)
92 |
93 | ### Details
94 |
95 | If you made it this far and are still interested, you should read the module
96 | documentation which includes details about the implementation.
97 | The pieces of the SWIM protocol are broken down as follows:
98 |
99 | * __*Failure Detection*__ - {@link swim}
100 | * __*Membership*__ - {@link swim_membership}
101 | * __*Dissemination*__ - {@link swim_broadcasts}
102 |
103 | ### How To
104 |
105 | Here is a quick reference for using SWIM in your application. These examples
106 | assume the `crypto` application is already started. We also assume encryption
107 | keys have already been distributed -- that's outside the scope of SWIM.
108 |
109 |
110 | % On our first node, let us start the seed listening at 192.168.2.10:5000
111 | {ok, Lan} = swim:start_link(lan, {{192,168,2,10}, 5000}, Keys, []).
112 |
113 | % On a different node, let us join the group `lan' using 192.168.2.10:5000
114 | % as the seed.
115 | Seeds = [{{192,168,2,10}, 5000}].
116 | {ok, Lan} = swim:start_link(lan, {{192,168,2,11}, 5000}, Keys, [{seeds, Seeds}]).
117 |
118 | % Let us check who else is in the group
119 | swim:members(lan).
120 |
121 | % By default the parent process will receive messages when group membership
122 | % changes. User-provided messages, send via `swim:publish/2', are sent to the
123 | % parent process as well. You can match against the different types of messages
124 | % and membership changes as see below:
125 |
126 | receive
127 | {swim, {membership, {alive, Member, _Incarnation}}} ->
128 | ok = error_logger:info_msg("Member is alive: ~p", [Member]);
129 | {swim, {membership, {faulty, Member, _Incarnation}}} ->
130 | ok = error_logger:info_msg("Member is dead: ~p", [Member]);
131 | {swim, {user, Msg}} ->
132 | ok = error_logger:info_msg("Received user message: ~p", [Msg])
133 | after
134 | 5000 ->
135 | timeout
136 | end.
137 |
138 | % Other non-parent processes can subscribe to receive messages about group
139 | % membership changes as well as user-provided messages.
140 | % Let us subscribe to these events:
141 |
142 | swim:subscribe(lan).
143 |
144 |
145 | ### Build
146 |
147 | We require OTP-18.x and an OpenSSL that supports AES-GCM. The default on OSX
148 | does not include support for AES-GCM, so it's recommended you use `homebrew' to
149 | install a newer version of OpenSSL and compile OTP linking to the OpenSSL managed
150 | by `homebrew'. Include `--with-ssl=/usr/local/opt/openssl' when compiling OTP.
151 |
152 | We use `rebar3', included in the source of this repo, to build and test `swim'.
153 |
154 |
155 | ./rebar3 do xref, dialyzer, eunit
156 |
157 |
158 | ## Modules ##
159 |
160 |
161 |
167 |
168 | @end
169 |
--------------------------------------------------------------------------------
/doc/stylesheet.css:
--------------------------------------------------------------------------------
1 | /* standard EDoc style sheet */
2 | body {
3 | font-family: Verdana, Arial, Helvetica, sans-serif;
4 | margin-left: .25in;
5 | margin-right: .2in;
6 | margin-top: 0.2in;
7 | margin-bottom: 0.2in;
8 | color: #000000;
9 | background-color: #ffffff;
10 | }
11 | h1,h2 {
12 | margin-left: -0.2in;
13 | }
14 | div.navbar {
15 | background-color: #add8e6;
16 | padding: 0.2em;
17 | }
18 | h2.indextitle {
19 | padding: 0.4em;
20 | background-color: #add8e6;
21 | }
22 | h3.function,h3.typedecl {
23 | background-color: #add8e6;
24 | padding-left: 1em;
25 | }
26 | div.spec {
27 | margin-left: 2em;
28 | background-color: #eeeeee;
29 | }
30 | a.module {
31 | text-decoration:none
32 | }
33 | a.module:hover {
34 | background-color: #eeeeee;
35 | }
36 | ul.definitions {
37 | list-style-type: none;
38 | }
39 | ul.index {
40 | list-style-type: none;
41 | background-color: #eeeeee;
42 | }
43 |
44 | /*
45 | * Minor style tweaks
46 | */
47 | ul {
48 | list-style-type: square;
49 | }
50 | table {
51 | border-collapse: collapse;
52 | }
53 | td {
54 | padding: 3
55 | }
56 |
--------------------------------------------------------------------------------
/doc/swim.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Module swim #
4 | * [Data Types](#types)
5 | * [Function Index](#index)
6 | * [Function Details](#functions)
7 |
8 |
9 |
10 | ## Data Types ##
11 |
12 |
13 |
14 |
15 | ### swim_opt() ###
16 |
17 |
18 |
19 | swim_opt() = {protocol_period, pos_integer()} | {ack_proxies, pos_integer()} | {ack_timeout, pos_integer()}
20 |
21 |
22 |
23 |
24 | ## Function Index ##
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 | ## Function Details ##
33 |
34 |
35 |
36 | ### child_spec/4 ###
37 |
38 | `child_spec(Name, LocalMember, Keys, Opts) -> any()`
39 |
40 |
41 |
42 | ### local_member/1 ###
43 |
44 | `local_member(Pid) -> any()`
45 |
46 |
47 |
48 | ### members/1 ###
49 |
50 | `members(Pid) -> any()`
51 |
52 |
53 |
54 | ### publish/2 ###
55 |
56 | `publish(Pid, Event) -> any()`
57 |
58 |
59 |
60 | ### rotate_keys/2 ###
61 |
62 | `rotate_keys(Pid, NewKey) -> any()`
63 |
64 |
65 |
66 | ### start_link/3 ###
67 |
68 |
69 | start_link(LocalMember::member(), Keys::[key()], Opts::[swim_opt() | swim_membership:swim_membership_opt()]) -> {ok, pid()}
70 |
71 |
72 |
73 |
74 |
75 | ### start_link/4 ###
76 |
77 |
78 | start_link(Name::atom(), LocalMember::member(), Keys::[key()], Opts::[swim_opt() | swim_membership:swim_membership_opt()]) -> {ok, pid()}
79 |
80 |
81 |
82 |
83 |
84 | ### stop/1 ###
85 |
86 | `stop(Pid) -> any()`
87 |
88 |
89 |
90 | ### subscribe/1 ###
91 |
92 | `subscribe(Pid) -> any()`
93 |
94 |
--------------------------------------------------------------------------------
/doc/swim_broadcasts.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Module swim_broadcasts #
4 | * [Description](#description)
5 | * [Function Index](#index)
6 | * [Function Details](#functions)
7 |
8 | This module is responsible for maintaining membership updates and user
9 | provided events along with their state as a part of the
10 | infection-style dissemination component of the SWIM protocol.
11 |
12 | Copyright (c) 2015
13 |
14 | __Version:__ Feb 18 2016 10:13:03
15 |
16 |
17 |
18 | ## Description ##
19 |
20 | ### Infection-Style Dissemination
21 | As an alternative to IP Multicast
22 | or a point-to-point messaging scheme the SWIM protocol
23 | disseminates membership updates by piggybacking on messages sent
24 | as a part of the failure detection protocol. Thus, implementation
25 | does not generate any extra packets to send membership updates.
26 |
27 | Here, `swim_broadcasts` maintains the buffer of recent membership
28 | events along with a count for each event. The local count
29 | specifies the number of times the event has been piggybacked so
30 | far by this member and is used to choose which events to piggyback
31 | next. Each event is piggybacked at most `Retransmit * log(N +
32 | 1)` times, where `Retransmit` is a configurable parameter.
33 | If the size of events in the buffer is larger than the maximum number of
34 | events that can be piggybacked on a single PING or ACK, events that have
35 | been gossiped fewer times are preferred. This is needed as the
36 | protocol period is fixed and the rate of membership changes might
37 | temporarily overwhelm the speed of dissemination. Preferring
38 | "younger" events under such circumstances ensures that all
39 | membership changes infect at least a few members - when the
40 | membership change rate quiesces, older events will
41 | propagate through the rest of the gossip group. Membership events are always
42 | preferred over user-provided events.
43 |
44 |
45 | ## Function Index ##
46 |
47 |
48 | dequeue/2 | |
dequeue/3 | Dequeues a set of encoded events ready to be broadcast to other members
49 | in the group. |
handle_info/2 | |
max_transmissions/2 | Calculates the maximum number of times an event should be broadcast. |
membership/2 | Queues a membership event to be broadcast to other members in the group. |
user/2 | Queues a user event to be broadcast to other members in the group. |
50 |
51 |
52 |
53 |
54 | ## Function Details ##
55 |
56 |
57 |
58 | ### dequeue/2 ###
59 |
60 |
61 | dequeue(EventMgrPid::pid(), NumMembers::pos_integer()) -> binary()
62 |
63 |
64 |
65 |
66 |
67 | ### dequeue/3 ###
68 |
69 |
70 | dequeue(EventMgrPid::pid() | module(), NumMembers::pos_integer(), MaxSize::pos_integer()) -> binary()
71 |
72 |
73 |
74 | Dequeues a set of encoded events ready to be broadcast to other members
75 | in the group
76 |
77 | Events to be broadcast are determined by the number of the peers as well as
78 | the size limitation provided by `MaxSize`. Membership events always take
79 | precedence over user events. Events are broadcast up to a max of
80 | determined by [`max_tranmissions/2`](#max_tranmissions-2). If the number of events
81 | exceeds the maximum number of events allowable under `MaxSize`, events that have
82 | been broadcast fewer times are preferred. This is needed as the rate of
83 | incoming events, i.e. membership changes, might temporarily overwhelm
84 | the speed of dissemination.
85 | Preferring younger events ensures that all events
86 | infect at least a few members. Events that have exceeded
87 | their retransmit limit are removed from the broadcasts. Events that are
88 | returned have their number of retransmissions incremented by 1.
89 |
90 |
91 |
92 | ### handle_info/2 ###
93 |
94 | `handle_info(Info, State) -> any()`
95 |
96 |
97 |
98 | ### max_transmissions/2 ###
99 |
100 |
101 | max_transmissions(NumMembers::pos_integer(), RetransmitFactor::pos_integer()) -> pos_integer()
102 |
103 |
104 |
105 | Calculates the maximum number of times an event should be broadcast.
106 |
107 |
108 |
109 | ### membership/2 ###
110 |
111 |
112 | membership(EventMgrPid::pid(), Event::{member_status(), member(), incarnation()}) -> ok
113 |
114 |
115 |
116 | Queues a membership event to be broadcast to other members in the group
117 |
118 |
119 |
120 | ### user/2 ###
121 |
122 |
123 | user(EventMgrPid::pid(), Event::term()) -> ok
124 |
125 |
126 |
127 | Queues a user event to be broadcast to other members in the group
128 |
129 |
--------------------------------------------------------------------------------
/doc/swim_membership.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Module swim_membership #
4 | * [Description](#description)
5 | * [Data Types](#types)
6 | * [Function Index](#index)
7 | * [Function Details](#functions)
8 |
9 | This module is responsible for maintaining the list and status of non
10 | faulty members in a gossip group through the use of the Suspicion Mechanism
11 | described in the SWIM Paper.
12 |
13 | Copyright (c) 2015
14 |
15 | __Version:__ Feb 18 2016 10:13:03
16 |
17 |
18 |
19 | ## Description ##
20 | A `swim_membership` process becomes aware of
21 | membership changes through exported function defined for a specific member
22 | status, [`alive/3`](#alive-3), [`suspect/3`](#suspect-3), [`faulty/3`](#faulty-3), as determined
23 | by the Failure Detection mechanism of SWIM implemented in [`swim`](swim.md). Member
24 | state includes the locally known status of a member as well as a logical clock
25 | for the member's status known in the SWIM paper as the incarnation.
26 | When the status of a member changes events are sent to [`swim_broadcast`](swim_broadcast.md)
27 | to be broadcast to the rest of the members in the gossip group.
28 |
29 |
30 |
31 | ## Data Types ##
32 |
33 |
34 |
35 |
36 | ### swim_membership_opt() ###
37 |
38 |
39 |
40 | swim_membership_opt() = {seeds, [member()]} | {suspicion_factor, pos_integer()} | {protocol_period, pos_integer()}
41 |
42 |
43 |
44 |
45 | ## Function Index ##
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 | ## Function Details ##
54 |
55 |
56 |
57 | ### alive/3 ###
58 |
59 |
60 | alive(Pid::pid(), Member::member(), Incarnation::incarnation()) -> [{member_status(), member(), incarnation()}]
61 |
62 |
63 |
64 | Set the member status to alive
65 |
66 | If the member isn't known it's added to the membership and an event is
67 | broadcast to the group. If the member is known and the incarnation is
68 | greater than the current incarnation of the member, we update the incarnation
69 | of member and broadcast an event to group. Otherwise, we do nothing.
70 |
71 |
72 |
73 | ### faulty/3 ###
74 |
75 |
76 | faulty(Pid::pid(), Member::member(), Incarnation::incarnation()) -> [{member_status(), member(), incarnation()}]
77 |
78 |
79 |
80 | Remove the member from the group
81 |
82 | If the member isn't already known we do nothing. If the member is known
83 | we remove the member and broadcast the change if the provided incarnation is
84 | greater than the current incarnation of the member.
85 |
86 |
87 |
88 | ### local_member/1 ###
89 |
90 |
91 | local_member(Pid::pid()) -> member()
92 |
93 |
94 |
95 | The identifier for the local member
96 |
97 |
98 |
99 | ### members/1 ###
100 |
101 |
102 | members(Pid::pid()) -> [{member_status(), member(), incarnation()}]
103 |
104 |
105 |
106 | A list of known members and their status
107 |
108 |
109 |
110 | ### num_members/1 ###
111 |
112 |
113 | num_members(Pid::pid()) -> pos_integer()
114 |
115 |
116 |
117 | The number of known members in the gossip group, including the local member
118 |
119 |
120 |
121 | ### opts/1 ###
122 |
123 |
124 | opts(Opts::list()) -> [swim_membership_opt()]
125 |
126 |
127 |
128 |
129 |
130 | ### set_status/3 ###
131 |
132 |
133 | set_status(Pid::pid(), Member::member(), Status::member_status()) -> ok
134 |
135 |
136 |
137 | Forcibly set the status of a member
138 |
139 | In certain circumstances we want to be able to set the status of a member
140 | without regard to the rules of the suspecicon mechanism which uses a member's
141 | current status and incarnation.
142 |
143 |
144 |
145 | ### start_link/3 ###
146 |
147 |
148 | start_link(LocalMember::member(), EventMgrPid::pid(), Opts::[swim_membership_opt()]) -> {ok, pid()}
149 |
150 |
151 |
152 |
153 |
154 | ### suspect/3 ###
155 |
156 |
157 | suspect(Pid::pid(), Member::member(), Incarnation::incarnation()) -> [{member_status(), member(), incarnation()}]
158 |
159 |
160 |
161 | Set the member status to suspect
162 |
163 | If the member isn't already known we do nothing. If the member is known
164 | we update the status and broadcast the change on the follow conditions. If
165 | the current status of the member is alive and the incarnation is greater than
166 | or equal to the known incarnation of the member, we update the member's status
167 | to suspect and broadcast the change. If the current status of the member is
168 | suspect and the incarnation is greater than the known incarnation, we update
169 | the member's status to suspect, set the known incarnation to the provided
170 | incarnation and broadcast the change.
171 | If the suspected member is the local member we refute by incrementing our own
172 | incarnation and broadcasting the change to the group.
173 |
174 |
--------------------------------------------------------------------------------
/doc/swim_messages.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Module swim_messages #
4 | * [Description](#description)
5 | * [Function Index](#index)
6 | * [Function Details](#functions)
7 |
8 | This module is responsible for encoding and decoding SWIM protocol
9 | messages as well as encrypting and decrypting the message payloads.
10 |
11 | Copyright (c) 2015
12 |
13 | __Version:__ Feb 18 2016 10:13:03
14 |
15 |
16 |
17 | ## Description ##
18 |
19 | SWIM protocol message encodings can be found in the documentation
20 | cooresponding to the various encoding functions defined in this module.
21 | [`encode_ack/3`](#encode_ack-3), [`encode_ping/2`](#encode_ping-2), [`encode_ping_req/2`](#encode_ping_req-2),
22 | [`encode_leave/1`](#encode_leave-1).
23 | All SWIM protocol messages are prefixed with a single octet reflecting
24 | the protocol version of the message. The overall format of SWIM messages is:
25 |
26 |
27 |
28 |
29 | 1 |
30 | 1 |
31 | N |
32 |
33 |
34 | Version |
35 | Tag |
36 | Data |
37 |
38 |
39 |
40 |
41 | - __*Version*__ : is the protocol version the message is encoded for
42 | - __*Tag*__ : indicates what type of SWIM message Data represents; ACK, PING,
43 | PING-REG, or LEAVE
44 | - __*Data*__ : The SWIM messages payload
45 |
46 | All SWIM messages are encrypted over the wire using AES128-GCM. See
47 | [`encrypt/3`](#encrypt-3) for more information. The encryption header is encoded as
48 | follows:
49 |
50 |
51 |
52 |
53 | 16 |
54 | 16 |
55 | N |
56 |
57 |
58 | IV |
59 | CipherTag |
60 | CipherText |
61 |
62 |
63 |
64 |
65 |
66 | ## Function Index ##
67 |
68 |
69 | decode/1 | Decodes the provided message from a binary to an Erlang Term. |
decode_event/2 | |
decode_events/1 | |
decrypt/3 | Verifies the authenticity of the payload and decrypts the ciphertext
70 | generated by encrypt/3 . |
encode_ack/3 | Encodes an ACK message, piggybacking membership and user events. |
encode_event/1 | Encode either a membership event or a user event. |
encode_events/1 | Encodes a list of swim events. |
encode_leave/1 | Encodes a LEAVE message. |
encode_member/1 | Encodes a Member as the IP address and port number combination. |
encode_ping/2 | Encodes a PING message, piggybacking membership and user events. |
encode_ping_req/2 | Encodes a PING-REQ message. |
encrypt/3 | Encrypts the provided plain text using the Advanced Encryption Standard
71 | (AES) in Galois/Counter (GCM) using the provided 32-octet Key,
72 | Associated Authenticated Data (AAD), and a randomly generated
73 | Initialization Vector (IV). |
event_size_limit/0 | Event size limit determines the maximum size (in octets) available to
74 | to piggyback membership and user events on an ACK or PING message. |
75 |
76 |
77 |
78 |
79 | ## Function Details ##
80 |
81 |
82 |
83 | ### decode/1 ###
84 |
85 |
86 | decode(Message::binary()) -> swim_message()
87 |
88 |
89 |
90 | Decodes the provided message from a binary to an Erlang Term.
91 |
92 | All messages are prefixed with a single octet to indicate the version of
93 | of the protocol. The return value is an Erlang term of the message. If the
94 | version is not supported or the message is malformed, an exception is thrown.
95 |
96 |
97 |
98 | ### decode_event/2 ###
99 |
100 |
101 | decode_event(X1::membership | user, X2::binary()) -> {swim_event(), binary()}
102 |
103 |
104 |
105 |
106 |
107 | ### decode_events/1 ###
108 |
109 |
110 | decode_events(Events::binary()) -> [swim_event()]
111 |
112 |
113 |
114 |
115 |
116 | ### decrypt/3 ###
117 |
118 |
119 | decrypt(Key::<<_:256>>, AAD::binary(), Payload::binary()) -> binary() | {error, failed_verification}
120 |
121 |
122 |
123 | Verifies the authenticity of the payload and decrypts the ciphertext
124 | generated by [`encrypt/3`](#encrypt-3). Note the keys used as input to [`encrypt/3`](#encrypt-3)
125 | must be identical to those provided here. Decrypt is not responsible for
126 | decoding the underlying Swim protocol message -- see [`decode/1`](#decode-1).
127 |
128 |
129 |
130 | ### encode_ack/3 ###
131 |
132 |
133 | encode_ack(Seq::sequence(), Target::member(), Events::[swim_event()] | binary()) -> binary() | no_return()
134 |
135 |
136 |
137 | Encodes an ACK message, piggybacking membership and user events.
138 |
139 | An ACK message has the following format:
140 |
141 |
142 |
143 |
144 | 1 |
145 | 4 |
146 | 6 |
147 | N |
148 |
149 |
150 | 2 |
151 | Sequence |
152 | Member |
153 | Events |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 | Sequence
162 |
163 |
164 |
165 |
166 | is the the same Sequence received in the coorisponding PING message
167 |
168 |
169 |
170 |
171 | Member
172 |
173 |
174 |
175 |
176 | is the terminal Member for the coorisponding PING. In the case of
177 | a PING-REQ, the Member is not the sender of this ACK.
178 | See encode_member/1
for Member encoding.
179 |
180 |
181 |
182 |
183 | Events
184 |
185 |
186 |
187 |
188 | is a list of membership and user events piggybacked as a part of the
189 | dissemination protocol.
190 | See encode_events/1
for Event encoding.
191 |
192 |
193 |
194 |
195 |
196 | ### encode_event/1 ###
197 |
198 |
199 | encode_event(Event::swim_event() | binary()) -> binary()
200 |
201 |
202 |
203 | Encode either a membership event or a user event.
204 |
205 | A membership event is encoded as follows:
206 |
207 |
208 |
209 |
210 | 1 |
211 | 1 |
212 | 6 |
213 | 4 |
214 |
215 |
216 | 50 |
217 | Status |
218 | Member |
219 | Incarnation |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 | Status
228 |
229 |
230 |
231 |
232 | is observed status of the Member being broadcast to the group
233 |
234 |
235 |
236 |
237 | Member
238 |
239 |
240 |
241 |
242 | is the subject of this membership event
243 |
244 |
245 |
246 |
247 | Incarnation
248 |
249 |
250 |
251 |
252 | is the incarnation of the subject Member known by the sender of this
253 | event. See swim_membership
for more information on Incarnations.
254 |
255 |
256 |
257 | A user event is encoded as follows:
258 |
259 |
260 |
261 |
262 | 1 |
263 | 2 |
264 | Size |
265 |
266 |
267 | 51 |
268 | Size |
269 | Erlang Term |
270 |
271 |
272 |
273 |
274 |
275 |
276 | ### encode_events/1 ###
277 |
278 |
279 | encode_events(Events::[swim_event() | binary()]) -> binary()
280 |
281 |
282 |
283 | Encodes a list of swim events. See [`encode_event/1`](#encode_event-1).
284 |
285 |
286 |
287 | ### encode_leave/1 ###
288 |
289 |
290 | encode_leave(Seq::sequence()) -> binary()
291 |
292 |
293 |
294 | Encodes a LEAVE message.
295 |
296 |
297 |
298 |
299 | 1 |
300 | 4 |
301 |
302 |
303 | 4 |
304 | Sequence |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 | Sequence
313 |
314 |
315 |
316 |
317 | is the iteration of the failure detection protocol the leave message
318 | was sent during
319 |
320 |
321 |
322 |
323 |
324 | ### encode_member/1 ###
325 |
326 |
327 | encode_member(Member::{inet:ip_address(), inet:port_number()}) -> <<_:48>> | <<_:96>>
328 |
329 |
330 |
331 | Encodes a Member as the IP address and port number combination.
332 |
333 |
334 |
335 |
336 | 1 |
337 | Size |
338 | 2 |
339 |
340 |
341 | Size |
342 | IP Address |
343 | Port Number |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 | IP Address
352 |
353 |
354 |
355 |
356 | is the IPv4 or IPv6 address the Member can be reached
357 |
358 |
359 |
360 |
361 | Port Number
362 |
363 |
364 |
365 |
366 | is the associated Port Number the Member is listening on
367 |
368 |
369 |
370 |
371 |
372 | ### encode_ping/2 ###
373 |
374 |
375 | encode_ping(Seq::sequence(), Events::[swim_event()] | binary()) -> binary() | no_return()
376 |
377 |
378 |
379 | Encodes a PING message, piggybacking membership and user events.
380 |
381 | A PING message has the following format:
382 |
383 |
384 |
385 |
386 | 1 |
387 | 4 |
388 | N |
389 |
390 |
391 | 1 |
392 | Sequence |
393 | Events |
394 |
395 |
396 |
397 |
398 |
399 |
400 | ### encode_ping_req/2 ###
401 |
402 |
403 | encode_ping_req(Seq::sequence(), Target::member()) -> binary()
404 |
405 |
406 |
407 | Encodes a PING-REQ message.
408 |
409 | A PING-REQ message has the following format:
410 |
411 |
412 |
413 |
414 | 1 |
415 | 4 |
416 | 6 |
417 |
418 |
419 | 3 |
420 | Sequence |
421 | Member |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 | Sequence
430 |
431 |
432 |
433 |
434 | is the iteration of the failure detection protocol the PING-REQ was
435 | sent during
436 |
437 |
438 |
439 |
440 | Member
441 |
442 |
443 |
444 |
445 | the terminal of the PING-REQ. The receiver of the PING-REQ is the proxy
446 | for the PING. See encode_member/1
for Member encoding.
447 |
448 |
449 |
450 |
451 |
452 | ### encrypt/3 ###
453 |
454 |
455 | encrypt(Key::<<_:256>>, AAD::binary(), PlainText::binary()) -> binary()
456 |
457 |
458 |
459 | Encrypts the provided plain text using the Advanced Encryption Standard
460 | (AES) in Galois/Counter (GCM) using the provided 32-octet Key,
461 | Associated Authenticated Data (AAD), and a randomly generated
462 | Initialization Vector (IV). The resulting payload includes the 16-octet IV,
463 | the 16-octet CipherTag and the block encrypted cipher text.
464 |
465 |
466 |
467 | ### event_size_limit/0 ###
468 |
469 |
470 | event_size_limit() -> '?MAX_EVENT_SIZE'
471 |
472 |
473 |
474 | Event size limit determines the maximum size (in octets) available to
475 | to piggyback membership and user events on an ACK or PING message.
476 |
477 | The max message size we use is the minimum reassembly buffer size defined for
478 | IPv4 to avoid IP fragmentation -- 576 octets.
479 | UDP has and overhead of a 20 octet IP header and an 8 octet
480 | UDP header. The max swim message size is 40 octets, with 16 octets for the
481 | nonce, and 16 octets for the CipherTag. That leaves 476 octets for the events.
482 | A membership event is 41 octets which equates to 11 membership events per
483 | ACK/PING message. User messages have an over head of 9 octets, leaving 467
484 | octets for the user message payload.
485 |
486 |
--------------------------------------------------------------------------------
/doc/swim_subscriptions.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Module swim_subscriptions #
4 | * [Function Index](#index)
5 | * [Function Details](#functions)
6 |
7 |
8 |
9 | ## Function Index ##
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | ## Function Details ##
18 |
19 |
20 |
21 | ### code_change/3 ###
22 |
23 | `code_change(OldVsn, State, Extra) -> any()`
24 |
25 |
26 |
27 | ### handle_call/2 ###
28 |
29 | `handle_call(Msg, State) -> any()`
30 |
31 |
32 |
33 | ### handle_event/2 ###
34 |
35 | `handle_event(Event, State) -> any()`
36 |
37 |
38 |
39 | ### handle_info/2 ###
40 |
41 | `handle_info(Info, State) -> any()`
42 |
43 |
44 |
45 | ### init/1 ###
46 |
47 | `init(X1) -> any()`
48 |
49 |
50 |
51 | ### subscribe/3 ###
52 |
53 | `subscribe(EventMgrPid, EventCategory, Pid) -> any()`
54 |
55 |
56 |
57 | ### terminate/2 ###
58 |
59 | `terminate(Reason, State) -> any()`
60 |
61 |
--------------------------------------------------------------------------------
/doc/swim_transport.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Module swim_transport #
4 | * [Function Index](#index)
5 | * [Function Details](#functions)
6 |
7 |
8 |
9 | ## Function Index ##
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | ## Function Details ##
18 |
19 |
20 |
21 | ### close/1 ###
22 |
23 | `close(Pid) -> any()`
24 |
25 |
26 |
27 | ### code_change/3 ###
28 |
29 | `code_change(OldVsn, State, Extra) -> any()`
30 |
31 |
32 |
33 | ### handle_call/3 ###
34 |
35 | `handle_call(Msg, From, State) -> any()`
36 |
37 |
38 |
39 | ### handle_cast/2 ###
40 |
41 | `handle_cast(Msg, State) -> any()`
42 |
43 |
44 |
45 | ### handle_info/2 ###
46 |
47 | `handle_info(Info, State) -> any()`
48 |
49 |
50 |
51 | ### init/1 ###
52 |
53 | `init(X1) -> any()`
54 |
55 |
56 |
57 | ### rotate_keys/2 ###
58 |
59 | `rotate_keys(Pid, Key) -> any()`
60 |
61 |
62 |
63 | ### send/4 ###
64 |
65 | `send(Pid, DestIp, DestPort, Data) -> any()`
66 |
67 |
68 |
69 | ### start_link/3 ###
70 |
71 | `start_link(ListenIp, ListenPort, Keys) -> any()`
72 |
73 |
74 |
75 | ### terminate/2 ###
76 |
77 | `terminate(Reason, State) -> any()`
78 |
79 |
--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
1 | {erl_opts, [
2 | debug_info,
3 | warn_unused_vars,
4 | warn_shadow_vars,
5 | warn_unused_export,
6 | warn_unused_function,
7 | warn_unused_record,
8 | warn_deprecated_function,
9 | warn_deprecated_type,
10 | warn_obsolete_guard,
11 | strict_validation,
12 | warn_export_vars,
13 | warn_exported_vars,
14 | warn_untyped_record,
15 | fail_on_warning
16 | ]}.
17 | {deps, []}.
18 | {cover_enabled, true}.
19 | {cover_opts, [verbose]}.
20 | {edoc_opts, [
21 | {doclet, edown_doclet},
22 | {app_default, "http://www.erlang.org/doc/man"},
23 | {doc_path, []},
24 | {top_level_readme, {"./README.md", "https://github.com/ctbarbour/swim", "master"}}
25 | ]}.
26 | {xref_warnings, true}.
27 | {xref_checks, [
28 | undefined_function_calls,
29 | undefined_functions,
30 | locals_not_used,
31 | deprecated_function_calls,
32 | deprecated_functions
33 | ]}.
34 | {dialyzer, [
35 | {warnings, [
36 | race_conditions,
37 | error_handling
38 | ]}
39 | ]}.
40 | {plugins, [rebar3_proper]}.
41 | {profiles, [
42 | {docs, [{deps, [edown]}]},
43 | {shell, [{deps, [recon, sync]}]},
44 | {test, [{deps, [proper, meck]}]}
45 | ]}.
46 |
--------------------------------------------------------------------------------
/rebar.lock:
--------------------------------------------------------------------------------
1 | [].
2 |
--------------------------------------------------------------------------------
/src/swim.app.src:
--------------------------------------------------------------------------------
1 | {application, swim,
2 | [{description, "Scalable Weakly Consistent Infection-style Process Group Membership Protocol"},
3 | {vsn, semver},
4 | {registered, []},
5 | {mod, {swim_app, []}},
6 | {applications,
7 | [kernel,
8 | stdlib,
9 | crypto
10 | ]},
11 | {env,[]},
12 | {modules, []},
13 | {licenses, ["Apache 2.0"]}
14 | ]}.
15 |
--------------------------------------------------------------------------------
/src/swim.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017. All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | -module(swim).
19 |
20 | -export([join/1]).
21 | -export([members/0]).
22 | -export([myself/0]).
23 | -export([publish/1]).
24 | -export([subscribe/1]).
25 | -export([unsubscribe/1]).
26 | -export([setup/0]).
27 |
28 | -type member() :: {inet:ip_address(), inet:port_number()}.
29 | -type incarnation() :: non_neg_integer().
30 | -type user_event() :: binary().
31 | -type membership_event() :: alive_event() | suspect_event() | faulty_event().
32 | -type suspect_event() :: {suspect, incarnation(), member(), member()}.
33 | -type alive_event() :: {alive, incarnation(), member()}.
34 | -type faulty_event() :: {faulty, incarnation(), member(), member()}.
35 | -type swim_event() :: {user, user_event()} | {membership, membership_event()}.
36 |
37 | -export_type([swim_event/0]).
38 | -export_type([member/0]).
39 | -export_type([incarnation/0]).
40 | -export_type([user_event/0]).
41 | -export_type([membership_event/0]).
42 |
43 | join(Seed) ->
44 | swim_pushpull:join(Seed, #{}).
45 |
46 | members() ->
47 | [M || {M, _S, _I} <- swim_state:members()].
48 |
49 | myself() ->
50 | swim_state:local_member().
51 |
52 | publish(Msg) when is_binary(Msg) ->
53 | swim_state:publish(Msg).
54 |
55 | subscribe(metrics) ->
56 | swim_metrics:subscribe(self());
57 | subscribe(EventCategory) ->
58 | swim_subscriptions:subscribe(EventCategory, self()).
59 |
60 | unsubscribe(metrics) ->
61 | swim_metrics:unsubscribe(self());
62 | unsubscribe(EventCategory) ->
63 | swim_subscriptions:unsubscribe(EventCategory, self()).
64 |
65 | setup() ->
66 | Key = base64:encode(crypto:strong_rand_bytes(32)),
67 | BasePort = 5000,
68 | Ms = lists:zip(
69 | [{{127,0,0,1}, P} || P <- lists:seq(BasePort, length(nodes()) + BasePort)],
70 | [node() | nodes()]),
71 | [rpc:call(Node, application, set_env, [swim, port, Port]) || {{_, Port}, Node} <- Ms],
72 | [rpc:call(Node, application, set_env, [swim, key, Key]) || Node <- [node() | nodes()]],
73 | [rpc:call(Node, application, start, [swim]) || Node <- [node() | nodes()]].
74 |
--------------------------------------------------------------------------------
/src/swim_app.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | -module(swim_app).
22 | -behavior(application).
23 |
24 | -export([start/2]).
25 | -export([stop/1]).
26 |
27 | start(_Type, _Args) ->
28 | swim_sup:start_link().
29 |
30 | stop(_State) ->
31 | ok.
32 |
--------------------------------------------------------------------------------
/src/swim_awareness.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | -module(swim_awareness).
22 |
23 | -export([new/1]).
24 | -export([success/1]).
25 | -export([failure/1]).
26 | -export([failure/2]).
27 | -export([scale/2]).
28 |
29 | -opaque awareness() :: {pos_integer(), non_neg_integer()}.
30 | -export_type([awareness/0]).
31 |
32 | -spec new(Max) -> Awareness when
33 | Max :: pos_integer(),
34 | Awareness :: awareness().
35 |
36 | new(Max) ->
37 | {Max, 0}.
38 |
39 | -spec success(Awareness0) -> Awareness when
40 | Awareness0 :: awareness(),
41 | Awareness :: awareness().
42 |
43 | success({Max, 0}) ->
44 | {Max, 0};
45 | success({Max, Value}) ->
46 | {Max, Value - 1}.
47 |
48 | -spec failure(Awareness0) -> Awareness when
49 | Awareness0 :: awareness(),
50 | Awareness :: awareness().
51 |
52 | failure({Max, Max}) ->
53 | {Max, Max};
54 | failure({Max, Value}) ->
55 | {Max, Value + 1}.
56 |
57 | -spec failure(N, Awareness0) -> Awareness when
58 | N :: pos_integer(),
59 | Awareness0 :: awareness(),
60 | Awareness :: awareness().
61 |
62 | failure(N, Awareness) ->
63 | lists:foldl(fun(_, A) -> failure(A) end, Awareness, lists:seq(1, N)).
64 |
65 | -spec scale(Timeout, Awareness) -> Value when
66 | Timeout :: non_neg_integer(),
67 | Awareness :: awareness(),
68 | Value :: non_neg_integer().
69 |
70 | scale(Timeout, {_Max, Value}) ->
71 | Timeout * (Value + 1).
72 |
--------------------------------------------------------------------------------
/src/swim_broadcasts.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | %%% @doc This module is responsible for maintaining membership updates and user
22 | %%% provided events along with their state as a part of the
23 | %%% infection-style dissemination component of the SWIM protocol.
24 | %%%
25 | %%% ### Infection-Style Dissemination
26 | %%% As an alternative to IP Multicast
27 | %%% or a point-to-point messaging scheme the SWIM protocol
28 | %%% disseminates membership updates by piggybacking on messages sent
29 | %%% as a part of the failure detection protocol. Thus, implementation
30 | %%% does not generate any extra packets to send membership updates.
31 | %%%
32 | %%% Here, `swim_broadcasts' maintains a buffer of recent membership
33 | %%% events along with a count for each event. The local count
34 | %%% specifies the number of times the event has been piggybacked so
35 | %%% far by this member and is used to choose which events to piggyback
36 | %%% next. Each event is piggybacked at most `Retransmit * log(N +
37 | %%% 1)' times, where `Retransmit' is a configurable parameter.
38 | %%% If the size of events in the buffer is larger than the maximum number of
39 | %%% events that can be piggybacked on a single PING or ACK, events that have
40 | %%% been gossiped fewer times are preferred. This is needed as the
41 | %%% protocol period is fixed and the rate of membership changes might
42 | %%% temporarily overwhelm the speed of dissemination. Preferring
43 | %%% "younger" events under such circumstances ensures that all
44 | %%% membership changes infect at least a few members - when the
45 | %%% membership change rate quiesces, older events will
46 | %%% propagate through the rest of the gossip group. Membership events are always
47 | %%% preferred over user-provided events.
48 | %%%
49 | %%% @TODO: Consider a more efficient implementation. We're sorting lists a lot.
50 | %%% We could potentially use a min-heap or priority queue but need to handle the requirement for
51 | %%% invalidating events about the same member. Consider
52 | %%% https://github.com/okeuday/pqueue/blob/master/src/pqueue4.erl
53 | %%% @end
54 | -module(swim_broadcasts).
55 |
56 | -export([new/1]).
57 | -export([new/2]).
58 | -export([insert/2]).
59 | -export([prune/2]).
60 | -export([take/1]).
61 | -export([take/2]).
62 | -export([retransmit_limit/2]).
63 |
64 | -record(broadcast, {
65 | members = [] :: [{non_neg_integer(), swim:membership_event()}],
66 | users = [] :: [{non_neg_integer(), swim:user_event()}],
67 | retransmits :: pos_integer(),
68 | limit_fun :: fun((swim:member_event() | swim:user_event()) -> pos_integer()),
69 | limit :: pos_integer()
70 | }).
71 |
72 | -opaque broadcast() :: #broadcast{}.
73 | -export_type([broadcast/0]).
74 |
75 | -spec new(Retransmits) -> Broadcast when
76 | Retransmits :: pos_integer(),
77 | Broadcast :: broadcast().
78 |
79 | new(Retransmits) ->
80 | new(Retransmits, default_limit()).
81 |
82 | -spec new(Retransmits, Limit) -> Broadcast when
83 | Retransmits :: pos_integer(),
84 | Limit :: pos_integer(),
85 | Broadcast :: broadcast().
86 |
87 | new(Retransmits, Limit) ->
88 | LimitFun = default_limit_fun(),
89 | #broadcast{retransmits = Retransmits, limit = Limit, limit_fun = LimitFun}.
90 |
91 | default_limit() ->
92 | swim_messages:event_size_limit().
93 |
94 | default_limit_fun() ->
95 | fun(E) -> iolist_size(swim_messages:encode_event(E)) end.
96 |
97 | -spec retransmit_limit(NumMembers, Broadcast) -> Limit when
98 | NumMembers :: pos_integer(),
99 | Broadcast :: broadcast(),
100 | Limit :: pos_integer().
101 |
102 | retransmit_limit(NumMembers, #broadcast{retransmits = Factor}) ->
103 | round(math:log(NumMembers + 1)) + Factor.
104 |
105 | -spec take(Broadcasts0) -> {Events, Broadcasts} when
106 | Broadcasts0 :: broadcast(),
107 | Events :: [swim:membership_event() | swim:user_event()],
108 | Broadcasts :: broadcast().
109 |
110 | take(#broadcast{limit = Limit, limit_fun = Fun, members = Members, users = Users} = Broadcast) ->
111 | {B, M, U} = take(Limit, Fun, Members, Users, {[], [], []}),
112 | {B, Broadcast#broadcast{members = M, users = U}}.
113 |
114 | take(0, _Fun, Members, Users, {B, M, U}) ->
115 | {B, lists:sort(Members ++ M), lists:sort(Users ++ U)};
116 | take(_Limit, _Fun, [], [], {B, M, U}) ->
117 | {B, lists:sort(M), lists:sort(U)};
118 | take(Limit, Fun, [{T, E} | Members], Users, {B, M, U}) ->
119 | case Limit - Fun({membership, E}) of
120 | L when L >= 0 ->
121 | take(L, Fun, Members, Users, {[{membership, E} | B], [{T + 1, E} | M], U});
122 | _ ->
123 | take(0, Fun, Members, Users, {B, [{T, E} | Members], U})
124 | end;
125 | take(Limit, Fun, [], [{T, E} | Users], {B, M, U}) ->
126 | case Limit - Fun({user, E}) of
127 | L when L >= 0 ->
128 | take(L, Fun, [], Users, {[{user, E} | B], M, [{T + 1, E} | U]});
129 | _ ->
130 | take(0, Fun, [], Users, {B, M, [{T, E} | U]})
131 | end.
132 |
133 | -spec take(Member, Broadcasts0) -> {Events, Broadcasts} when
134 | Member :: swim:member(),
135 | Broadcasts0 :: broadcast(),
136 | Events :: [swim:membership_event() | swim:user_event()],
137 | Broadcasts :: broadcast().
138 |
139 | take(Target, Broadcasts) ->
140 | #broadcast{limit = Limit, limit_fun = Fun, users = Users} = Broadcasts,
141 | Partition = fun({_, {suspect, _, M, _}}) -> M =:= Target; (_) -> false end,
142 | {Maybe, Members} = lists:partition(Partition, Broadcasts#broadcast.members),
143 | {B, M, U} =
144 | case Maybe of
145 | [] ->
146 | take(Limit, Fun, Members, Users, {[], [], []});
147 | [{T, About}] ->
148 | Acc = {[{membership, About}], [{T + 1, About}], []},
149 | take(Limit - Fun({membership, About}), Fun, Members, Users, Acc)
150 | end,
151 | {B, Broadcasts#broadcast{members = M, users = U}}.
152 |
153 | -spec prune(Retransmits, Broadcasts0) -> Broadcasts when
154 | Retransmits :: non_neg_integer(),
155 | Broadcasts0 :: broadcast(),
156 | Broadcasts :: broadcast().
157 |
158 | prune(Retransmits, #broadcast{members = MembershipEvents0, users = UserEvents0} = Broadcast) ->
159 | Filter = fun({T, _}) -> T < Retransmits end,
160 | MembershipEvents = lists:filter(Filter, MembershipEvents0),
161 | UserEvents = lists:filter(Filter, UserEvents0),
162 | Broadcast#broadcast{members = MembershipEvents, users = UserEvents}.
163 |
164 | %% @doc Insert an Event or list of Events to the Broadcast queue
165 | %%
166 | %% Upon inserting a Membership Event we invalidate any existing event about the same target member
167 | %% to prevent the brodcast of stale information.
168 | %% @end
169 | -spec insert(Events, Broadcasts0) -> Broadcasts when
170 | Events :: swim:swim_event() | [swim:swim_event()],
171 | Broadcasts0 :: broadcast(),
172 | Broadcasts :: broadcast().
173 |
174 | insert(Events, Broadcast) when is_list(Events) ->
175 | lists:foldl(fun insert/2, Broadcast, Events);
176 | insert({membership, Event}, #broadcast{members = MembershipEvents} = Broadcast) ->
177 | Broadcast#broadcast{members = invalidate(Event, MembershipEvents)};
178 | insert({user, Event}, #broadcast{users = UserEvents} = Broadcast) ->
179 | Broadcast#broadcast{users = lists:sort([{0, Event} | UserEvents])}.
180 |
181 | -spec invalidate(Event, Events0) -> Events when
182 | Event :: swim:membership_event(),
183 | Events0 :: [{non_neg_integer(), swim:membership_event()}],
184 | Events :: [{non_neg_integer(), swim:membership_event()}].
185 |
186 | invalidate({_, _, Member} = Event, Events) ->
187 | invalidate(Member, Event, Events);
188 | invalidate({_, _, Member, _} = Event, Events) ->
189 | invalidate(Member, Event, Events).
190 |
191 | -spec invalidate(Member, Event, Events0) -> Events when
192 | Member :: swim:member(),
193 | Event :: swim:membership_event(),
194 | Events0 :: [{non_neg_integer(), swim:membership_event()}],
195 | Events :: [{non_neg_integer(), swim:membership_event()}].
196 |
197 | invalidate(Member, Event, Events) ->
198 | Filter = fun({_, {_, _, M}}) -> M =/= Member;
199 | ({_, {_, _, M, _}}) -> M =/= Member end,
200 | lists:sort([{0, Event} | lists:filter(Filter, Events)]).
201 |
--------------------------------------------------------------------------------
/src/swim_failure.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | -module(swim_failure).
22 | -behavior(gen_server).
23 |
24 | -export([start_link/4]).
25 | -export([stop/0]).
26 | -export([probe/3]).
27 | -export([probe/4]).
28 |
29 | -export([init/1]).
30 | -export([handle_call/3]).
31 | -export([handle_cast/2]).
32 | -export([handle_info/2]).
33 | -export([code_change/3]).
34 | -export([terminate/2]).
35 |
36 | -record(state, {
37 | local_member :: swim:member(),
38 | probe :: undefined | probe(),
39 | ping_reqs = #{} :: #{{swim:member(), sequence()} := ping_req()},
40 | nack_timeout :: non_neg_integer(),
41 | ack_timeout :: non_neg_integer(),
42 | socket :: undefined | inet:socket(),
43 | keyring :: swim_keyring:keyring(),
44 | sequence = 0 :: sequence()
45 | }).
46 |
47 | -record(probe, {
48 | target :: swim:member(),
49 | sequence :: sequence(),
50 | ack_timer :: reference(),
51 | probe_timer :: reference(),
52 | ack_fun :: fun((swim:member()) -> ok),
53 | missing_nacks = 0 :: non_neg_integer()
54 | }).
55 |
56 | -record(ping_req, {
57 | origin :: swim:member(),
58 | sequence :: sequence(),
59 | ack_timer :: reference(),
60 | nack_timer :: reference()
61 | }).
62 |
63 | -type ping_req() :: #ping_req{}.
64 | -type probe() :: #probe{}.
65 | -type sequence() :: non_neg_integer().
66 |
67 | -export_type([sequence/0]).
68 |
69 | -spec start_link(Member, KeyRing, AckTimeout, NackTimeout) -> {ok, pid()} when
70 | Member :: swim:member(),
71 | KeyRing :: swim_keyring:keyring(),
72 | AckTimeout :: pos_integer(),
73 | NackTimeout :: pos_integer().
74 |
75 | start_link(LocalMember, Keyring, AckTimeout, NackTimeout) ->
76 | Args = [LocalMember, Keyring, AckTimeout, NackTimeout],
77 | gen_server:start_link({local, ?MODULE}, ?MODULE, Args, []).
78 |
79 | -spec stop() -> ok.
80 |
81 | stop() ->
82 | gen_server:stop(?MODULE).
83 |
84 | -spec probe(Target, AckTimeout, ProbeTimeout) -> ok when
85 | Target :: swim:member(),
86 | AckTimeout :: pos_integer(),
87 | ProbeTimeout :: pos_integer().
88 |
89 | probe(Target, AckTimeout, ProbeTimeout) ->
90 | probe(Target, AckTimeout, ProbeTimeout, fun swim_state:ack/1).
91 |
92 | -spec probe(Target, AckTimeout, ProbeTimeout, AckFun) -> ok when
93 | Target :: swim:member(),
94 | AckTimeout :: pos_integer(),
95 | ProbeTimeout :: pos_integer(),
96 | AckFun :: fun((swim:member()) -> ok).
97 |
98 | probe(Target, AckTimeout, ProbeTimeout, AckFun)
99 | when ProbeTimeout >= AckTimeout * 3 ->
100 | Msg = {probe, Target, AckTimeout, ProbeTimeout, AckFun},
101 | gen_server:cast(?MODULE, Msg).
102 |
103 | %% @private
104 | init([{_, Port} = LocalMember, Keyring, AckTimeout, NackTimeout]) ->
105 | SocketOpts = [binary, {active, 16}],
106 | {ok, Socket} = swim_socket:open(Port, SocketOpts),
107 | State = #state{
108 | local_member = LocalMember,
109 | keyring = Keyring,
110 | socket = Socket,
111 | ack_timeout = AckTimeout,
112 | nack_timeout = NackTimeout
113 | },
114 | {ok, State}.
115 |
116 | %% @private
117 | handle_call(_Msg, _From, State) ->
118 | {noreply, State}.
119 |
120 | %% @private
121 | handle_cast({probe, Target, AckTimeout, ProbeTimeout, AckFun}, State)
122 | when State#state.probe =:= undefined ->
123 | {noreply, send_probe(Target, AckTimeout, ProbeTimeout, AckFun, State)};
124 | handle_cast(_Msg, State) ->
125 | {noreply, State}.
126 |
127 | %% @private
128 | handle_info({udp_passive, Socket}, #state{socket = Socket} = State) ->
129 | ok = swim_socket:setopts(Socket, [{active, 16}]),
130 | {noreply, State};
131 | handle_info({udp, Socket, Ip, InPortNo, Packet}, #state{socket = Socket} = State) ->
132 | {noreply, handle_packet(Packet, {Ip, InPortNo}, State)};
133 | handle_info({probe_timeout, Target, Sequence}, State) ->
134 | {noreply, handle_probe_timeout(Target, Sequence, State)};
135 | handle_info({ack_timeout, Target, Sequence}, State) ->
136 | {noreply, handle_ack_timeout(Target, Sequence, State)};
137 | handle_info({nack_timeout, Target, Sequence}, State) ->
138 | {noreply, handle_nack_timeout(Target, Sequence, State)};
139 | handle_info(_Info, State) ->
140 | {noreply, State}.
141 |
142 | %% @private
143 | code_change(_OldVsn, State, _Extra) ->
144 | {ok, State}.
145 |
146 | %% @private
147 | terminate(_Reason, #state{socket = undefined}) ->
148 | ok;
149 | terminate(_Reason, #state{socket = Socket}) ->
150 | swim_socket:close(Socket).
151 |
152 | send_probe(Target, AckTimeout, ProbeTimeout, AckFun, State) ->
153 | NextSequence = State#state.sequence + 1,
154 | Msg = {ping, NextSequence, Target},
155 | NewState = send(Target, Msg, State),
156 | AckTimer = start_ack_timer(AckTimeout, Target, NextSequence),
157 | ProbeTimer = start_probe_timer(ProbeTimeout, Target, NextSequence),
158 | Probe = #probe{
159 | target = Target,
160 | sequence = NextSequence,
161 | ack_timer = AckTimer,
162 | probe_timer = ProbeTimer,
163 | ack_fun = AckFun
164 | },
165 | swim_metrics:notify({probe, Target}),
166 | NewState#state{probe = Probe, sequence = NextSequence}.
167 |
168 | handle_packet(Packet, Peer, State) ->
169 | case decrypt(Packet, State) of
170 | {ok, PlainText} ->
171 | try
172 | {Message, Events} = swim_messages:decode(PlainText),
173 | swim_metrics:notify({rx, iolist_size(Packet)}),
174 | spawn_link(fun() -> handle_events(Events) end),
175 | handle_message(Message, Peer, State)
176 | catch
177 | _:_ ->
178 | State
179 | end;
180 | {error, failed_verification} ->
181 | State
182 | end.
183 |
184 | handle_message({ack, Sequence, Terminal}, _Peer, State) ->
185 | swim_metrics:notify({ack, Terminal}),
186 | handle_ack(Sequence, Terminal, State);
187 | handle_message({nack, Sequence, Terminal}, Peer, State) ->
188 | swim_metrics:notify({nack, Terminal, Peer}),
189 | handle_nack(Sequence, Terminal, State);
190 | handle_message({ping, Sequence, Target}, Peer, State) ->
191 | swim_metrics:notify({ping, Peer}),
192 | handle_ping(Target, Sequence, Peer, State);
193 | handle_message({ping_req, Sequence, Terminal}, Peer, State) ->
194 | swim_metrics:notify({ping_req, Terminal, Peer}),
195 | handle_ping_req(Sequence, Terminal, Peer, State).
196 |
197 | handle_ack(Sequence, Responder, #state{probe = Probe} = State)
198 | when Responder =:= Probe#probe.target andalso Probe#probe.sequence =:= Sequence ->
199 | #probe{ack_fun = AckFun, ack_timer = AckTimer, probe_timer = ProbeTimer} = Probe,
200 | AckFun(Responder),
201 | swim_time:cancel_timer(AckTimer, [{async, true}, {info, false}]),
202 | swim_time:cancel_timer(ProbeTimer, [{async, true}, {info, false}]),
203 | State#state{probe = undefined};
204 | handle_ack(Sequence, Responder, State) ->
205 | case maps:take({Responder, Sequence}, State#state.ping_reqs) of
206 | {PingReq, PingReqs} ->
207 | Msg = {ack, PingReq#ping_req.sequence, Responder},
208 | NewState = send(PingReq#ping_req.origin, Msg, State),
209 | swim_time:cancel_timer(PingReq#ping_req.ack_timer, [{async, true}, {info, false}]),
210 | swim_time:cancel_timer(PingReq#ping_req.nack_timer, [{async, true}, {info, false}]),
211 | NewState#state{ping_reqs = PingReqs};
212 | error ->
213 | State
214 | end.
215 |
216 | handle_nack(Sequence, Target, #state{probe = Probe} = State)
217 | when Target =:= Probe#probe.target andalso Probe#probe.sequence =:= Sequence ->
218 | #probe{missing_nacks = MissingNacks} = Probe,
219 | State#state{probe = Probe#probe{missing_nacks = MissingNacks - 1}};
220 | handle_nack(_Sequence, _Target, State) ->
221 | State.
222 |
223 | handle_ping(Target, Sequence, Peer, #state{local_member = Target} = State) ->
224 | Msg = {ack, Sequence, Target},
225 | send(Peer, Msg, State);
226 | handle_ping(_Target, _Sequence, _Peer, State) ->
227 | State.
228 |
229 | handle_ping_req(OriginSequence, Terminal, Origin, State) ->
230 | NextSequence = State#state.sequence + 1,
231 | Msg = {ping, NextSequence, Terminal},
232 | NewState = send(Terminal, Msg, State),
233 | NackTimer = start_nack_timer(State#state.nack_timeout, Terminal, NextSequence),
234 | AckTimer = start_ack_timer(State#state.ack_timeout, Terminal, NextSequence),
235 | PingReq = #ping_req{origin = Origin, sequence = OriginSequence,
236 | ack_timer = AckTimer, nack_timer = NackTimer},
237 | PingReqs = maps:put({Terminal, NextSequence}, PingReq, State#state.ping_reqs),
238 | NewState#state{ping_reqs = PingReqs, sequence = NextSequence}.
239 |
240 | handle_ack_timeout(Target, Sequence, #state{probe = Probe} = State)
241 | when Probe#probe.target =:= Target andalso Probe#probe.sequence =:= Sequence ->
242 | swim_metrics:notify({ack_timeout, Target}),
243 | swim_time:cancel_timer(Probe#probe.ack_timer, [{async, true}, {info, false}]),
244 | Msg = {ping_req, Sequence, Probe#probe.target},
245 | Proxies = swim_state:proxies(Target),
246 | NewState = lists:foldl(fun(Proxy, S) -> send(Proxy, Msg, S) end, State, Proxies),
247 | NewState#state{probe = Probe#probe{missing_nacks = length(Proxies)}};
248 | handle_ack_timeout(Target, Sequence, State) ->
249 | case maps:take({Target, Sequence}, State#state.ping_reqs) of
250 | {_, PingReqs} ->
251 | swim_metrics:notify({ack_timeout, Target}),
252 | State#state{ping_reqs = PingReqs};
253 | error ->
254 | State
255 | end.
256 |
257 | handle_nack_timeout(Target, Sequence, State) ->
258 | case maps:find({Target, Sequence}, State#state.ping_reqs) of
259 | {ok, #ping_req{origin = Origin, sequence = OriginSequence}} ->
260 | swim_metrics:notify({nack_timeout, Target, Origin}),
261 | Msg = {nack, OriginSequence, Origin},
262 | send(Origin, Msg, State);
263 | error ->
264 | State
265 | end.
266 |
267 | handle_probe_timeout(Target, Sequence, #state{probe = Probe} = State)
268 | when Probe#probe.target =:= Target andalso Probe#probe.sequence =:= Sequence ->
269 | swim_metrics:notify({probe_timeout, Target}),
270 | swim_state:probe_timeout(Target, Probe#probe.missing_nacks),
271 | State#state{probe = undefined};
272 | handle_probe_timeout(_Target, _Sequence, State) ->
273 | State.
274 |
275 | handle_events(Events) ->
276 | [swim_state:handle_event(Event) || {Category, _} = Event <- Events,
277 | Category =:= membership],
278 | [swim_subscriptions:publish(Event) || {Category, _} = Event <- Events,
279 | Category =:= user],
280 | ok.
281 |
282 | % Not sure if we need to handle the case when sending to the socket fails or if we can
283 | % just let it crash.
284 | send({DestIp, DestPort} = Target, Msg, State) ->
285 | Events = swim_state:broadcasts(Target),
286 | Payload = encrypt(swim_messages:encode({Msg, Events}), State),
287 | ok = swim_socket:send(State#state.socket, DestIp, DestPort, Payload),
288 | swim_metrics:notify({tx, iolist_size(Payload)}),
289 | State.
290 |
291 | start_ack_timer(Timeout, Terminal, Sequence) ->
292 | swim_time:send_after(Timeout, self(), {ack_timeout, Terminal, Sequence}).
293 |
294 | start_nack_timer(Timeout, Terminal, Sequence) ->
295 | swim_time:send_after(Timeout, self(), {nack_timeout, Terminal, Sequence}).
296 |
297 | start_probe_timer(Timeout, Target, Sequence) ->
298 | swim_time:send_after(Timeout, self(), {probe_timeout, Target, Sequence}).
299 |
300 | encrypt(Msg, State) ->
301 | swim_keyring:encrypt(Msg, State#state.keyring).
302 |
303 | decrypt(CipherText, State) ->
304 | swim_keyring:decrypt(CipherText, State#state.keyring).
305 |
--------------------------------------------------------------------------------
/src/swim_keyring.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | -module(swim_keyring).
22 |
23 | -export([new/1]).
24 | -export([new/2]).
25 | -export([add/2]).
26 | -export([encrypt/2]).
27 | -export([decrypt/2]).
28 |
29 | -define(AAD, crypto:hash(sha256, term_to_binary(erlang:get_cookie()))).
30 |
31 | -record(keyring, {
32 | keys :: nonempty_list(<<_:256>>),
33 | aad :: binary()
34 | }).
35 |
36 | -opaque keyring() :: #keyring{}.
37 | -export_type([keyring/0]).
38 |
39 | new(Keys) ->
40 | new(Keys, ?AAD).
41 |
42 | new(Keys, AAD)
43 | when is_list(Keys) andalso Keys =/= [] ->
44 | #keyring{keys = Keys, aad = AAD}.
45 |
46 | add(Key, KeyRing)
47 | when is_binary(Key) andalso byte_size(Key) =:= 32 ->
48 | KeyRing#keyring{keys = [Key | KeyRing#keyring.keys]}.
49 |
50 | %% @doc Encrypts the provided plain text using the Advanced Encryption Standard
51 | %% (AES) in Galois/Counter (GCM) using the provided 32-octet Key,
52 | %% Associated Authenticated Data (AAD), and a randomly generated
53 | %% Initialization Vector (IV). The resulting payload includes the 16-octet IV,
54 | %% the 16-octet CipherTag and the block encrypted cipher text.
55 | %% @end
56 | -spec encrypt(PlainText, Keyring) -> CipherText when
57 | PlainText :: iodata(),
58 | Keyring :: keyring(),
59 | CipherText :: iodata().
60 |
61 | encrypt(PlainText, #keyring{keys = [Key | _], aad = AAD}) ->
62 | IV = crypto:strong_rand_bytes(16),
63 | {CipherText, CipherTag} = crypto:crypto_one_time_aead(aes_256_gcm, Key, IV, PlainText, AAD, true),
64 | <>.
65 |
66 | %% @doc Verifies the authenticity of the payload and decrypts the ciphertext
67 | %% generated by {@link encrypt/3}. Note the keys used as input to {@link encrypt/3}
68 | %% must be identical to those provided here. Decrypt is not responsible for
69 | %% decoding the underlying Swim protocol message -- see {@link decode/1}.
70 | -spec decrypt(CipherText, KeyRing) -> {ok, PlainText} | {error, failed_verification} when
71 | CipherText :: binary(),
72 | KeyRing :: keyring(),
73 | PlainText :: binary().
74 |
75 | decrypt(<>, Keyring) ->
76 | #keyring{keys = Keys, aad = AAD} = Keyring,
77 | decrypt_loop(Keys, AAD, IV, CipherTag, CipherText);
78 | decrypt(_CipherText, _KeyRing) ->
79 | {error, failed_verification}.
80 |
81 | decrypt_loop([], _AAD, _IV, _CipherTag, _CipherText) ->
82 | {error, failed_verification};
83 | decrypt_loop([Key | Keys], AAD, IV, CipherTag, CipherText) ->
84 | case crypto:crypto_one_time_aead(aes_256_gcm, Key, IV, CipherText, AAD, CipherTag, false) of
85 | error ->
86 | decrypt_loop(Keys, AAD, IV, CipherTag, CipherText);
87 | PlainText ->
88 | {ok, PlainText}
89 | end.
90 |
91 |
--------------------------------------------------------------------------------
/src/swim_membership.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017. All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | %%% @doc This module is responsible for maintaining the list and status of non
22 | %%% faulty members in a gossip group through the use of the Suspicion Mechanism
23 | %%% described in the SWIM Paper. A `swim_membership' process becomes aware of
24 | %%% membership changes through exported function defined for a specific member
25 | %%% status, {@link alive/3}, {@link suspect/3}, {@link faulty/3}, as determined
26 | %%% by the Failure Detection mechanism of SWIM implemented in {@link swim}. Member
27 | %%% state includes the locally known status of a member as well as a logical clock
28 | %%% for the member's status known in the SWIM paper as the incarnation.
29 | %%% When the status of a member changes events are sent to {@link swim_broadcast}
30 | %%% to be broadcast to the rest of the members in the gossip group.
31 | %%%
32 | %%% @end
33 |
34 | -module(swim_membership).
35 |
36 | -export([new/5]).
37 | -export([local_member/1]).
38 | -export([local_state/1]).
39 | -export([members/1]).
40 | -export([probe_target/1]).
41 | -export([proxies/3]).
42 | -export([size/1]).
43 | -export([refuted/2]).
44 | -export([alive/3]).
45 | -export([suspect/4]).
46 | -export([faulty/4]).
47 | -export([handle_event/2]).
48 |
49 | -record(membership, {
50 | local_member :: swim:member(),
51 | incarnation = 0 :: swim:incarnation(),
52 | members = #{} :: #{swim:member() := state()},
53 | faulty = ordsets:new() :: ordsets:ordset(swim:member()),
54 | probe_targets = [] :: [swim:member()],
55 | alpha :: pos_integer(),
56 | beta :: pos_integer(),
57 | protocol_period :: pos_integer(),
58 | suspicion_factor :: pos_integer()
59 | }).
60 |
61 | -record(alive, {
62 | incarnation = 0 :: swim:incarnation(),
63 | last_modified :: integer()
64 | }).
65 |
66 | -record(suspect, {
67 | incarnation :: swim:incarnation(),
68 | suspecting = ordsets:new() :: ordsets:ordset(swim:member()),
69 | tref :: reference(),
70 | last_modified :: integer(),
71 | min :: float(),
72 | max :: float(),
73 | k :: non_neg_integer(),
74 | timeout :: pos_integer()
75 | }).
76 |
77 | -type state() :: alive() | suspect().
78 | -type alive() :: #alive{}.
79 | -type suspect() :: #suspect{}.
80 |
81 | -opaque membership() :: #membership{}.
82 | -export_type([membership/0]).
83 |
84 | new(LocalMember, Alpha, Beta, ProtocolPeriod, SuspicionFactor) ->
85 | #membership{
86 | local_member = LocalMember,
87 | alpha = Alpha,
88 | beta = Beta,
89 | protocol_period = ProtocolPeriod,
90 | suspicion_factor = SuspicionFactor
91 | }.
92 |
93 | %% @doc The number of known members in the gossip group, including the local member
94 | -spec size(Membership) -> NumMembers when
95 | Membership :: membership(),
96 | NumMembers :: non_neg_integer().
97 |
98 | size(#membership{members = Members}) ->
99 | maps:size(Members) + 1.
100 |
101 | -spec members(Membership) -> Members when
102 | Membership :: membership(),
103 | Members :: [{swim:member(), alive | suspect, swim:incarnation()}].
104 |
105 | members(#membership{members = Members}) ->
106 | maps:fold(fun(Member, #alive{incarnation = Inc}, Acc) ->
107 | [{Member, alive, Inc} | Acc];
108 | (Member, #suspect{incarnation = Inc}, Acc) ->
109 | [{Member, suspect, Inc} | Acc]
110 | end, [], Members).
111 |
112 | %% @doc The identifier for the local member
113 | -spec local_member(Membership) -> Member when
114 | Membership :: membership(),
115 | Member :: swim:member().
116 |
117 | local_member(#membership{local_member = LocalMember}) ->
118 | LocalMember.
119 |
120 | -spec local_state(Membership) -> Events when
121 | Membership :: membership(),
122 | Events :: [swim:membership_event()].
123 |
124 | local_state(#membership{local_member = LocalMember, incarnation = Inc, members = Members}) ->
125 | [{membership, {alive, Inc, LocalMember}} |
126 | maps:fold(
127 | fun(Member, #suspect{incarnation = Incarnation}, Acc) ->
128 | [{membership, {suspect, Incarnation, Member, LocalMember}} | Acc];
129 | (Member, #alive{incarnation = Incarnation}, Acc) ->
130 | [{membership, {alive, Incarnation, Member}} | Acc]
131 | end, [], Members)].
132 |
133 | -spec probe_target(Membership0) -> none | {Target, Membership} when
134 | Membership0 :: membership(),
135 | Target :: {swim:member(), swim:incarnation()},
136 | Membership :: membership().
137 |
138 | probe_target(#membership{probe_targets = []} = Membership)
139 | when map_size(Membership#membership.members) =:= 0 ->
140 | none;
141 | probe_target(#membership{probe_targets = []} = Membership) ->
142 | Members = maps:keys(Membership#membership.members),
143 | Targets = [M || {_, M} <- lists:keysort(1, [{rand:uniform(), N} || N <- Members])],
144 | probe_target(Membership#membership{probe_targets = Targets});
145 | probe_target(#membership{probe_targets = [T | Targets]} = Membership) ->
146 | Target =
147 | case maps:find(T, Membership#membership.members) of
148 | {ok, #alive{incarnation = Inc}} -> {T, Inc};
149 | {ok, #suspect{incarnation = Inc}} -> {T, Inc};
150 | error -> probe_target(Membership#membership{probe_targets = Targets})
151 | end,
152 | {Target, Membership#membership{probe_targets = Targets}}.
153 |
154 | -spec proxies(Num, Target, Membership) -> Proxies when
155 | Num :: pos_integer(),
156 | Target :: swim:member(),
157 | Membership :: membership(),
158 | Proxies :: [swim:member()].
159 |
160 | proxies(Num, Target, Membership) ->
161 | Members = maps:keys(Membership#membership.members),
162 | Targets = [M || {_, M} <- lists:keysort(1, [{rand:uniform(), N} || N <- Members]), M =/= Target],
163 | lists:sublist(Targets, Num).
164 |
165 | -spec handle_event(Event, Membership0) -> {Events, Membership} when
166 | Event :: swim:membership_event(),
167 | Membership0 :: membership(),
168 | Events :: [swim:membership_event()],
169 | Membership :: membership().
170 |
171 | handle_event({membership, {alive, Incarnation, Member}}, Membership) ->
172 | alive(Member, Incarnation, Membership);
173 | handle_event({membership, {suspect, Incarnation, Member, From}}, Membership) ->
174 | suspect(Member, Incarnation, From, Membership);
175 | handle_event({membership, {faulty, Incarnation, Member, From}}, Membership) ->
176 | faulty(Member, Incarnation, From, Membership);
177 | handle_event(_Event, Membership) ->
178 | {[], Membership}.
179 |
180 | %% @doc Set the member status to alive
181 | %%
182 | %% If the member isn't known it's added to the membership and an event is
183 | %% broadcast to notify the group of the alive member. If the member is known and the incarnation is
184 | %% greater than the current incarnation of the member we update the incarnation
185 | %% of member and broadcast an event to group. If the member is the local member and the incarnation
186 | %% is greater than the current incarnation we refute the alive message by setting our current
187 | %% incarnation to 1 + the received incarnation and then broad a new alive message to the group.
188 | %% If none of the above conditions are meet we do nothing.
189 | %% @end
190 | -spec alive(Member, Incarnation, Membership0) -> {Events, Membership} when
191 | Member :: swim:member(),
192 | Incarnation :: swim:incarnation(),
193 | Membership0 :: membership(),
194 | Events :: [swim:membership_event()],
195 | Membership :: membership().
196 |
197 | alive(Member, Incarnation, Membership)
198 | when Member =:= Membership#membership.local_member andalso
199 | Incarnation =< Membership#membership.incarnation ->
200 | {[], Membership};
201 | alive(Member, Incarnation, Membership)
202 | when Member =:= Membership#membership.local_member andalso
203 | Incarnation > Membership#membership.incarnation ->
204 | refute(Incarnation, Membership);
205 | alive(Member, Incarnation, Membership) ->
206 | #membership{members = CurrentMembers, faulty = Faulty} = Membership,
207 | case maps:find(Member, CurrentMembers) of
208 | error ->
209 | State = #alive{incarnation = Incarnation,
210 | last_modified = swim_time:monotonic_time()},
211 | ProbeTargets = Membership#membership.probe_targets ++ [Member],
212 | Events = [{membership, {alive, Incarnation, Member}}],
213 | NewMembers = maps:put(Member, State, CurrentMembers),
214 | {Events, Membership#membership{
215 | members = NewMembers,
216 | probe_targets = ProbeTargets,
217 | faulty = ordsets:del_element(Member, Faulty)}};
218 | {ok, #suspect{} = Suspect}
219 | when Incarnation > Suspect#suspect.incarnation ->
220 | swim_time:cancel_timer(Suspect#suspect.tref, [{async, true}, {info, false}]),
221 | Alive = #alive{
222 | incarnation = Incarnation,
223 | last_modified = swim_time:monotonic_time()
224 | },
225 | NewMembers = maps:put(Member, Alive, CurrentMembers),
226 | Events = [{membership, {alive, Incarnation, Member}}],
227 | {Events, Membership#membership{members = NewMembers}};
228 | {ok, #alive{incarnation = CurrentInc} = Alive0}
229 | when Incarnation > CurrentInc ->
230 | Alive = Alive0#alive{incarnation = Incarnation,
231 | last_modified = swim_time:monotonic_time()},
232 | {[], Membership#membership{members = maps:put(Member, Alive, CurrentMembers)}};
233 | {ok, _} ->
234 | {[], Membership}
235 | end.
236 |
237 | %% @doc Set the member status to suspect
238 | %%
239 | %% If the member isn't already known we do nothing. If the member is known
240 | %% we update the status and broadcast the change on the follow conditions. If
241 | %% the current status of the member is alive and the incarnation is greater than
242 | %% or equal to the known incarnation of the member, we update the member's status
243 | %% to suspect and broadcast the change. If the current status of the member is
244 | %% suspect and the incarnation is greater than the known incarnation, we update
245 | %% the member's status to suspect, set the known incarnation to the provided
246 | %% incarnation and broadcast the change.
247 | %% If the suspected member is the local member we refute by incrementing our own
248 | %% incarnation and broadcasting the change to the group.
249 | %% @end
250 | -spec suspect(Member, Incarnation, From, Membership0) -> {Events, Membership} when
251 | Member :: swim:member(),
252 | Incarnation :: swim:incarnation(),
253 | From :: local | swim:member(),
254 | Membership0 :: membership(),
255 | Events :: [swim:membership_event()],
256 | Membership :: membership().
257 |
258 | suspect(Member, Incarnation, _From, Membership)
259 | when Member =:= Membership#membership.local_member ->
260 | refute(Incarnation, Membership);
261 | suspect(Member, Incarnation, local, Membership) ->
262 | #membership{local_member = From} = Membership,
263 | suspect(Member, Incarnation, From, Membership);
264 | suspect(Member, Incarnation, From, Membership) ->
265 | #membership{members = CurrentMembers, local_member = LocalMember} = Membership,
266 | case maps:find(Member, CurrentMembers) of
267 | {ok, #suspect{suspecting = Suspecting, incarnation = CurrentIncarnation, k = K} = Suspect}
268 | when Incarnation >= CurrentIncarnation ->
269 | case {ordsets:is_element(From, Suspecting), ordsets:size(Suspecting) < K} of
270 | {false, true} ->
271 | Elapsed = swim_time:cancel_timer(Suspect#suspect.tref),
272 | Timeout = remaining_suspicion_time(Elapsed, Suspect),
273 | TRef = start_timer(Timeout, Member, Incarnation),
274 | NewState = Suspect#suspect{
275 | suspecting = ordsets:add_element(From, Suspecting),
276 | incarnation = Incarnation,
277 | tref = TRef,
278 | last_modified = swim_time:monotonic_time(),
279 | timeout = Timeout},
280 | NewMembers = maps:put(Member, NewState, CurrentMembers),
281 | Events = [{membership, {suspect, Incarnation, Member, From}}],
282 | {Events, Membership#membership{members = NewMembers}};
283 | _ ->
284 | NewState = Suspect#suspect{incarnation = Incarnation,
285 | last_modified = swim_time:monotonic_time()},
286 | NewMembers = maps:put(Member, NewState, CurrentMembers),
287 | {[], Membership#membership{members = NewMembers}}
288 | end;
289 | {ok, #alive{incarnation = CurrentIncarnation}}
290 | when Incarnation >= CurrentIncarnation ->
291 | {Min, Max, K, Timeout} = initial_suspicion_timeout(Membership),
292 | TRef = start_timer(Timeout, Member, Incarnation),
293 | NewState = #suspect{
294 | incarnation = Incarnation,
295 | suspecting = ordsets:from_list([From]),
296 | tref = TRef,
297 | last_modified = swim_time:monotonic_time(),
298 | min = Min,
299 | max = Max,
300 | k = K,
301 | timeout = Timeout
302 | },
303 | NewMembers = maps:put(Member, NewState, CurrentMembers),
304 | Events = [{membership, {suspect, Incarnation, Member, LocalMember}}],
305 | {Events, Membership#membership{members = NewMembers}};
306 | _ ->
307 | {[], Membership}
308 | end.
309 |
310 | start_timer(Timeout, Member, Incarnation) ->
311 | swim_time:send_after(Timeout, self(), {suspicion_timeout, Member, Incarnation}).
312 |
313 | remaining_suspicion_time(Remaining, Suspect) ->
314 | #suspect{suspecting = Suspecting, k = K, min = Min, max = Max, timeout = Total} = Suspect,
315 | Elapsed = Total - Remaining,
316 | Frac = math:log(ordsets:size(Suspecting) + 1) / math:log(K + 1),
317 | Timeout = floor(max(Min, Max - (Max - Min) * Frac)),
318 | Timeout - Elapsed.
319 |
320 | initial_suspicion_timeout(Membership) ->
321 | N = maps:size(Membership#membership.members),
322 | Min = Membership#membership.alpha * max(1, math:log(N)) * Membership#membership.protocol_period,
323 | Max = Membership#membership.beta * Min,
324 | % If there aren't enough members in the group excluding ourselves and the suspected member we
325 | % won't expect any additional suspicions so we immediately set the timeout to Min.
326 | K = case N < Membership#membership.suspicion_factor - 2 of
327 | true -> 0;
328 | false -> Membership#membership.suspicion_factor
329 | end,
330 | Timeout = case K < 1 of
331 | true -> Min;
332 | false -> Max
333 | end,
334 | {Min, Max, K, floor(Timeout)}.
335 |
336 | %% @doc Remove the member from the group
337 | %%
338 | %% If the member isn't already known we do nothing. If the member is known
339 | %% we remove the member and broadcast the change if the provided incarnation is
340 | %% greater than the current incarnation of the member.
341 | %% @end
342 | -spec faulty(Member, Incarnation, From, Membership0) -> {Events, Membership} when
343 | Member :: swim:member(),
344 | Incarnation :: swim:incarnation(),
345 | From :: local | swim:member(),
346 | Membership0 :: membership(),
347 | Events :: [swim:membership_event()],
348 | Membership :: membership().
349 |
350 | faulty(Member, Incarnation, _From, Membership)
351 | when Member =:= Membership#membership.local_member ->
352 | refute(Incarnation, Membership);
353 | faulty(Member, Incarnation, local, Membership) ->
354 | #membership{local_member = From} = Membership,
355 | faulty(Member, Incarnation, From, Membership);
356 | faulty(Member, Incarnation, From, Membership) ->
357 | #membership{members = CurrentMembers, faulty = Faulty} = Membership,
358 | case maps:find(Member, CurrentMembers) of
359 | {ok, #suspect{incarnation = CurrentIncarnation}}
360 | when Incarnation >= CurrentIncarnation ->
361 | {[{membership, {faulty, Incarnation, Member, From}}],
362 | Membership#membership{members = maps:remove(Member, CurrentMembers),
363 | faulty = ordsets:add_element(Member, Faulty)}};
364 | _ ->
365 | {[], Membership}
366 | end.
367 |
368 | refuted([], _Membership) ->
369 | false;
370 | refuted([{membership, {alive, _Inc, Member}} | _Events], Membership)
371 | when Membership#membership.local_member =:= Member ->
372 | true;
373 | refuted([_Event | Events], Membership) ->
374 | refuted(Events, Membership).
375 |
376 | %% @private
377 | refute(Incarnation, #membership{local_member = LocalMember} = Membership)
378 | when Incarnation >= Membership#membership.incarnation ->
379 | NewIncarnation = Incarnation + 1,
380 | {[{membership, {alive, NewIncarnation, LocalMember}}],
381 | Membership#membership{incarnation = NewIncarnation}};
382 | refute(Incarnation, #membership{incarnation = CurrentIncarnation} = Membership)
383 | when Incarnation < CurrentIncarnation ->
384 | {[{membership, {alive, CurrentIncarnation, Membership#membership.local_member}}], Membership}.
385 |
--------------------------------------------------------------------------------
/src/swim_messages.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017. All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | %%% @doc This module is responsible for encoding and decoding SWIM protocol
22 | %%% messages.
23 | %%%
24 | %%% SWIM protocol message encodings can be found in the documentation
25 | %%% cooresponding to the various encoding functions defined in this module.
26 | %%% {@link encode_ack/3}, {@link encode_ping/3}, {@link encode_ping_req/2},
27 | %%% {@link encode_leave/1}.
28 | %%% All SWIM protocol messages are prefixed with a single octet reflecting
29 | %%% the protocol version of the message. The overall format of SWIM messages is:
30 | %%%
31 | %%%
32 | %%%
33 | %%% 1 |
34 | %%% 1 |
35 | %%% N |
36 | %%%
37 | %%%
38 | %%% Version |
39 | %%% Tag |
40 | %%% Data |
41 | %%%
42 | %%%
43 | %%% - __*Version*__ : is the protocol version the message is encoded for
44 | %%% - __*Tag*__ : indicates what type of SWIM message Data represents; ACK, PING,
45 | %%% PING-REG, or LEAVE
46 | %%% - __*Data*__ : The SWIM messages payload
47 | %%%
48 | %%% All SWIM messages are encrypted over the wire using AES128-GCM. See
49 | %%% {@link swim_keyring:encrypt/2} for more information.
50 | %%% @end
51 | -module(swim_messages).
52 |
53 | -export([encode/1]).
54 | -export([decode/1]).
55 | -export([encode_event/1]).
56 | -export([event_size_limit/0]).
57 |
58 | -define(HEADER, 1).
59 |
60 | -type ack() :: {ack, swim_failure:sequence(), swim:member()}.
61 | -type nack() :: {nack, swim_failure:sequence(), swim:member()}.
62 | -type ping() :: {ping, swim_failure:sequence(), swim:member()}.
63 | -type ping_req() :: {ping_req, swim_failure:sequence(), swim:member()}.
64 | -type swim_message() :: ack() | nack() | ping() | ping_req().
65 |
66 | -export_type([swim_message/0]).
67 |
68 | %% @doc Event size limit determines the maximum size (in octets) available to
69 | %% to piggyback membership and user events on an ACK or PING message.
70 | %%
71 | %% The max message size we use is the minimum reassembly buffer size defined for
72 | %% IPv4 to avoid IP fragmentation -- 576 octets.
73 | %% UDP has an overhead of a 20 octet IP header and an 8 octet
74 | %% UDP header. A PING/ACK/PING-REQ are each 13 octets plus 16 octets for the
75 | %% nonce, and 16 octets for the CipherTag for a minimum size of 84 bytes. That leaves
76 | %% 492 octets for the events.
77 | %% A membership event is 42 octets which equates to a maximum of 11 membership events per
78 | %% ACK/PING/PING-REQ message. A user event can be a maximum of 492 octets.
79 | %% @end
80 | -spec event_size_limit() -> non_neg_integer().
81 | event_size_limit() ->
82 | 452.
83 |
84 | encode({{ack, Sequence, Member}, Events}) ->
85 | [?HEADER, $a, <>, encode_member(Member), encode_events(Events)];
86 | encode({{nack, Sequence, Member}, Events}) ->
87 | [?HEADER, $n, <>, encode_member(Member), encode_events(Events)];
88 | encode({{ping, Sequence, Member}, Events}) ->
89 | [?HEADER, $p, <>, encode_member(Member), encode_events(Events)];
90 | encode({{ping_req, Sequence, Member}, Events}) ->
91 | [?HEADER, $r, <>, encode_member(Member), encode_events(Events)].
92 |
93 | encode_events([]) ->
94 | [];
95 | encode_events(Events) ->
96 | L = length(Events),
97 | [<>, encode_es(Events)].
98 |
99 | encode_es([]) ->
100 | [];
101 | encode_es([Event | Events]) ->
102 | [encode_event(Event) | encode_es(Events)].
103 |
104 | %% @doc Encode either a membership event or a user event.
105 | %%
106 | %% A membership event is encoded as follows:
107 | %%
108 | %%
109 | %% 1 |
110 | %% 1 |
111 | %% 6 |
112 | %% 4 |
113 | %%
114 | %%
115 | %% 50 |
116 | %% Status |
117 | %% Member |
118 | %% Incarnation |
119 | %%
120 | %%
121 | %%
122 | %% Status
123 | %% - is observed status of the Member being broadcast to the group
124 | %% Member
125 | %% - is the subject of this membership event
126 | %% Incarnation
127 | %% - is the incarnation of the subject Member known by the sender of this
128 | %% event. See {@link swim_membership} for more information on Incarnations.
129 | %%
130 | %%
131 | %% A user event is encoded as follows:
132 | %%
133 | %%
134 | %% 1 |
135 | %% 2 |
136 | %% Size |
137 | %%
138 | %%
139 | %% 51 |
140 | %% Size |
141 | %% Erlang Term |
142 | %%
143 | %%
144 | %% @end
145 | -spec encode_event(Event) -> iolist() when Event :: swim:swim_event().
146 |
147 | encode_event({membership, {suspect, Incarnation, Target, From}}) ->
148 | [$m, $s, <>, encode_member(Target), encode_member(From)];
149 | encode_event({membership, {alive, Incarnation, Target}}) ->
150 | [$m, $a, <>, encode_member(Target)];
151 | encode_event({membership, {faulty, Incarnation, Target, From}}) ->
152 | [$m, $f, <>, encode_member(Target), encode_member(From)];
153 | encode_event({user, Bin}) when is_binary(Bin) ->
154 | [$u, <<(byte_size(Bin)):16/integer>>, Bin].
155 |
156 | %% @doc Encodes a Member as the IP address and port number combination.
157 | %%
158 | %%
159 | %%
160 | %% 1 |
161 | %% Size |
162 | %% 2 |
163 | %%
164 | %%
165 | %% Size |
166 | %% IP Address |
167 | %% Port Number |
168 | %%
169 | %%
170 | %%
171 | %% IP Address
172 | %% - is the IPv4 or IPv6 address the Member can be reached
173 | %% Port Number
174 | %% - is the associated Port Number the Member is listening on
175 | %%
176 | %% @end
177 | -spec encode_member(Member) -> binary() when Member :: swim:member().
178 |
179 | encode_member({{A1, A2, A3, A4}, Port}) ->
180 | <<6, A1:8/integer, A2:8/integer, A3:8/integer, A4:8/integer, Port:16/integer>>;
181 | encode_member({{A1, A2, A3, A4, A5, A6, A7, A8}, Port}) ->
182 | <<18,
183 | A1:16/integer, A2:16/integer, A3:16/integer, A4:16/integer,
184 | A5:16/integer, A6:16/integer, A7:16/integer, A8:16/integer,
185 | Port:16/integer>>.
186 |
187 | %% @doc Decodes the provided message from a binary to an Erlang Term.
188 | %%
189 | %% All messages are prefixed with a single octet to indicate the version of
190 | %% of the protocol. The return value is an Erlang term of the message. If the
191 | %% version is not supported or the message is malformed, an exception is thrown.
192 | %% @end
193 | -spec decode(Packet) -> Result when
194 | Packet :: binary(),
195 | Result :: {swim_message(), [swim:swim_event()]} | no_return().
196 |
197 | decode(<>) ->
198 | {{ack, Sequence, decode_member(Member)}, decode_events(Events)};
199 | decode(<>) ->
200 | {{nack, Sequence, decode_member(Member)}, decode_events(Events)};
201 | decode(<>) ->
202 | {{ping_req, Sequence, decode_member(Member)}, decode_events(Events)};
203 | decode(<>) ->
204 | {{ping, Sequence, decode_member(Member)}, decode_events(Events)}.
205 |
206 | decode_member(<>) ->
207 | {{A1, A2, A3, A4}, Port};
208 | decode_member(<>) ->
211 | {{A1, A2, A3, A4, A5, A6, A7, A8}, Port}.
212 |
213 | decode_events(<<>>) ->
214 | [];
215 | decode_events(<>) ->
216 | decode_es(L, Events).
217 |
218 | decode_es(0, <<>>) ->
219 | [];
220 | decode_es(K, <<$m, $s, I:32/integer, L:8, T:L/binary, S:8, F:S/binary, Es/binary>>) ->
221 | [{membership, {suspect, I, decode_member(T), decode_member(F)}} | decode_es(K - 1, Es)];
222 | decode_es(K, <<$m, $f, I:32/integer, L:8, T:L/binary, S:8, F:S/binary, Es/binary>>) ->
223 | [{membership, {faulty, I, decode_member(T), decode_member(F)}} | decode_es(K - 1, Es)];
224 | decode_es(K, <<$m, $a, I:32/integer, L:8, T:L/binary, Es/binary>>) ->
225 | [{membership, {alive, I, decode_member(T)}} | decode_es(K - 1, Es)];
226 | decode_es(K, <<$u, L:16/integer, Event:L/binary, Events/binary>>) ->
227 | [{user, Event} | decode_es(K - 1, Events)].
228 |
--------------------------------------------------------------------------------
/src/swim_metrics.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | -module(swim_metrics).
22 | -behavior(gen_event).
23 |
24 | -export([start_link/0]).
25 | -export([notify/1]).
26 | -export([subscribe/1]).
27 | -export([unsubscribe/1]).
28 |
29 | -export([init/1]).
30 | -export([handle_event/2]).
31 | -export([handle_call/2]).
32 | -export([handle_info/2]).
33 | -export([code_change/3]).
34 | -export([terminate/2]).
35 |
36 | start_link() ->
37 | gen_event:start_link({local, ?MODULE}).
38 |
39 | subscribe(Pid) ->
40 | gen_event:add_handler(?MODULE, ?MODULE, [Pid]).
41 |
42 | unsubscribe(Pid) ->
43 | gen_event:delete_handler(?MODULE, ?MODULE, [Pid]).
44 |
45 | notify(Event) ->
46 | gen_event:notify(?MODULE, Event).
47 |
48 | init([Subscriber]) ->
49 | erlang:monitor(process, Subscriber),
50 | {ok, Subscriber}.
51 |
52 | handle_event(Event, Subscriber) ->
53 | Subscriber ! Event,
54 | {ok, Subscriber}.
55 |
56 | handle_call(Request, State) ->
57 | {ok, Request, State}.
58 |
59 | handle_info({'DOWN', _MRef, process, Subscriber, _Reason}, Subscriber) ->
60 | remove_handler;
61 | handle_info(_Info, State) ->
62 | {ok, State}.
63 |
64 | code_change(_OldVsn, State, _Extra) ->
65 | {ok, State}.
66 |
67 | terminate(_Reason, _State) ->
68 | ok.
69 |
--------------------------------------------------------------------------------
/src/swim_pushpull.erl:
--------------------------------------------------------------------------------
1 | -module(swim_pushpull).
2 | -behavior(gen_server).
3 |
4 | -export([join/2]).
5 | -export([start_link/3]).
6 | -export([accept/4]).
7 |
8 | -export([init/1]).
9 | -export([handle_call/3]).
10 | -export([handle_cast/2]).
11 | -export([handle_info/2]).
12 | -export([code_change/3]).
13 | -export([terminate/2]).
14 |
15 | -record(state, {
16 | socket :: inet:socket() | ssl:socket(),
17 | acceptors :: ets:tab(),
18 | local_member :: swim:member(),
19 | opts :: maps:map()
20 | }).
21 |
22 | join(Member, Opts) ->
23 | LocalMember = swim_state:local_member(),
24 | Transport = maps:get(transport, Opts, tcp),
25 | TransportOpts = [binary, {packet, 4}, {active, false}, {nodelay, true}
26 | | maps:get(transport_opts, Opts, [])],
27 | Retries = maps:get(retries, Opts, 5),
28 | case connect(Member, Transport, TransportOpts, Opts, Retries) of
29 | {ok, Socket} ->
30 | Msg = {push_pull, LocalMember, [{membership, {alive, 0, LocalMember}}]},
31 | ok = swim_socket:send(Socket, encode(Msg)),
32 | case swim_socket:recv(Socket, 0, 5000) of
33 | {ok, Data} ->
34 | swim_socket:close(Socket),
35 | {push_pull, _RemoteMember, RemoteState} = decode(Data),
36 | merge_state(RemoteState),
37 | ok;
38 | Error ->
39 | Error
40 | end;
41 | Err ->
42 | Err
43 | end.
44 |
45 | connect({Ip, Port} = Member, Transport, TransportOpts, Opts, Retries) ->
46 | ConnectTimeout = maps:get(connect_timeout, Opts, 5000),
47 | case swim_socket:connect(Transport, Ip, Port, TransportOpts, ConnectTimeout)of
48 | {ok, Socket} ->
49 | {ok, Socket};
50 | {error, _Reason} ->
51 | retry_connect(Member, Transport, TransportOpts, Opts, Retries)
52 | end.
53 |
54 | retry_connect(_Member, _Transport, _TransportOpts, _Opts, 0) ->
55 | {error, retry_limit_exceeded};
56 | retry_connect(Member, Transport, TransportOpts, Opts, Retries) ->
57 | RetryTimeout = maps:get(retry_timeout, Opts, 5000),
58 | _ = erlang:send_after(RetryTimeout, self(), retry),
59 | receive
60 | retry ->
61 | connect(Member, Transport, TransportOpts, Opts, Retries - 1)
62 | end.
63 |
64 | start_link(IpAddr, Port, Opts) ->
65 | gen_server:start_link(?MODULE, [IpAddr, Port, Opts], []).
66 |
67 | init([IpAddr, Port, Opts]) ->
68 | MinAcceptors = maps:get(min_acceptors, Opts, 2),
69 | TcpOpts = [binary, {packet, 4}, {ip, IpAddr},
70 | {reuseaddr, true}, {nodelay, true},
71 | {active, false}],
72 | {ok, Socket} = swim_socket:listen(tcp, Port, TcpOpts),
73 | Acceptors = ets:new(accecptor, [private, set]),
74 | State = #state{local_member = {IpAddr, Port}, socket = Socket,
75 | acceptors = Acceptors, opts = Opts},
76 | [start_add_acceptor(State) || _ <- lists:seq(1, MinAcceptors)],
77 | {ok, State}.
78 |
79 | handle_call(_Req, _From, State) ->
80 | {noreply, State}.
81 |
82 | handle_cast(accepted, State) ->
83 | ok = start_add_acceptor(State),
84 | {noreply, State};
85 | handle_cast(_Req, State) ->
86 | {noreply, State}.
87 |
88 | handle_info({'EXIT', _Pid, {error, emfile}}, State) ->
89 | {stop, emfile, State};
90 | handle_info({'EXIT', Pid, _Reason}, State) ->
91 | ok = remove_acceptor(State, Pid),
92 | {noreply, State};
93 | handle_info(_Info, State) ->
94 | {noreply, State}.
95 |
96 | code_change(_OldVsn, State, _Extra) ->
97 | {ok, State}.
98 |
99 | terminate(_Reason, _State) ->
100 | ok.
101 |
102 | start_add_acceptor(State) ->
103 | Args = [self(), State#state.local_member, State#state.socket, State#state.opts],
104 | Pid = spawn_link(?MODULE, accept, Args),
105 | ets:insert(State#state.acceptors, {Pid}),
106 | ok.
107 |
108 | remove_acceptor(State, Pid) ->
109 | ets:delete(State#state.acceptors, Pid),
110 | ok.
111 |
112 | accept(Server, LocalMember, ListenSocket, Opts) ->
113 | case catch swim_socket:accept(ListenSocket, Server, maps:get(accept_timeout, Opts, 10000)) of
114 | {ok, Socket} ->
115 | read_message(LocalMember, Socket, Opts),
116 | swim_socket:close(Socket),
117 | ok;
118 | {error, timeout} ->
119 | accept(Server, LocalMember, ListenSocket, Opts);
120 | {error, econnaborted} ->
121 | accept(Server, LocalMember, ListenSocket, Opts);
122 | {error, {tls_alert, _}} ->
123 | accept(Server, LocalMember, ListenSocket, Opts);
124 | {error, closed} ->
125 | ok;
126 | {error, Reason} ->
127 | exit({error, Reason})
128 | end.
129 |
130 | read_message(LocalMember, Socket, Opts) ->
131 | case swim_socket:recv(Socket, 0, maps:get(receive_timeout, Opts, 60000)) of
132 | {ok, Data} ->
133 | handle_message(decode(Data), LocalMember, Socket);
134 | {error, Reason} ->
135 | {error, Reason}
136 | end.
137 |
138 | handle_message({push_pull, RemoteMember, RemoteState}, LocalMember, Socket) ->
139 | LocalState = swim_state:local_state(),
140 | send_message({push_pull, LocalMember, LocalState}, Socket),
141 | swim_metrics:notify({push_pull, RemoteMember}),
142 | spawn_link(fun() -> merge_state(RemoteState) end),
143 | ok;
144 | handle_message(_Other, _LocalMember, _Socket) ->
145 | ok.
146 |
147 | send_message(Message, Socket) ->
148 | EncodedMessage = encode(Message),
149 | swim_socket:send(Socket, EncodedMessage).
150 |
151 | decode(Data) ->
152 | binary_to_term(Data).
153 |
154 | encode(Data) ->
155 | term_to_binary(Data).
156 |
157 | merge_state(RemoteState) ->
158 | [swim_state:handle_event(Event) || Event <- RemoteState].
159 |
--------------------------------------------------------------------------------
/src/swim_pushpull_sup.erl:
--------------------------------------------------------------------------------
1 | -module(swim_pushpull_sup).
2 | -behavior(supervisor).
3 |
4 | -export([start_link/2]).
5 | -export([init/1]).
6 |
7 | start_link(IpAddr, Port) ->
8 | supervisor:start_link({local, ?MODULE}, ?MODULE, [IpAddr, Port]).
9 |
10 | init([IpAddr, Port]) ->
11 | ListenerSpec = #{
12 | id => pushpull,
13 | start => {swim_pushpull, start_link, [IpAddr, Port, #{}]}},
14 | Flags = #{strategy => one_for_one,
15 | intensity => 10,
16 | period => 10},
17 | {ok, {Flags, [ListenerSpec]}}.
18 |
--------------------------------------------------------------------------------
/src/swim_socket.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | -module(swim_socket).
22 |
23 | -export([connect/5]).
24 | -export([open/2]).
25 | -export([close/1]).
26 | -export([send/4]).
27 | -export([setopts/2]).
28 | -export([listen/3]).
29 | -export([accept/3]).
30 | -export([recv/3]).
31 | -export([send/2]).
32 | -export([peername/1]).
33 |
34 | open(Port, Opts) ->
35 | gen_udp:open(Port, Opts).
36 |
37 | connect(tcp, IpAddr, Port, Opts, Timeout) ->
38 | case gen_tcp:connect(IpAddr, Port, Opts, Timeout) of
39 | {ok, Socket} ->
40 | {ok, {tcp, Socket}};
41 | {error, Reason} ->
42 | {error, Reason}
43 | end;
44 | connect(ssl, IpAddr, Port, Opts, Timeout) ->
45 | case ssl:connect(IpAddr, Port, Opts, Timeout) of
46 | {ok, Socket} ->
47 | {ok, {ssl, Socket}};
48 | {error, Reason} ->
49 | {error, Reason}
50 | end.
51 |
52 | close({tcp, Socket}) ->
53 | gen_tcp:close(Socket);
54 | close({ssl, Socket}) ->
55 | ssl:close(Socket);
56 | close(Socket) ->
57 | gen_udp:close(Socket).
58 |
59 | send(Socket, DestIp, DestPort, Payload) ->
60 | gen_udp:send(Socket, DestIp, DestPort, Payload).
61 |
62 | setopts({tcp, Socket}, Opts) ->
63 | inet:setopts(Socket, Opts);
64 | setopts({ssl, Socket}, Opts) ->
65 | ssl:setopts(Socket, Opts);
66 | setopts(Socket, Opts) ->
67 | inet:setopts(Socket, Opts).
68 |
69 | listen(tcp, Port, Opts) ->
70 | case gen_tcp:listen(Port, Opts) of
71 | {ok, Socket} ->
72 | {ok, {tcp, Socket}};
73 | {error, Reason} ->
74 | {error, Reason}
75 | end;
76 | listen(ssl, Port, Opts) ->
77 | case ssl:listen(Port, Opts) of
78 | {ok, Socket} ->
79 | {ok, {ssl, Socket}};
80 | {error, Reason} ->
81 | {error, Reason}
82 | end.
83 |
84 | accept({tcp, ListenSocket}, Pid, Timeout) ->
85 | case gen_tcp:accept(ListenSocket, Timeout) of
86 | {ok, Socket} ->
87 | gen_server:cast(Pid, accepted),
88 | {ok, {tcp, Socket}};
89 | {error, Reason} ->
90 | {error, Reason}
91 | end;
92 | accept({ssl, ListenSocket}, Pid, Timeout) ->
93 | case ssl:transport_accept(ListenSocket, Timeout) of
94 | {ok, Socket} ->
95 | gen_server:cast(Pid, accepted),
96 | case ssl:handshake(Socket, Timeout) of
97 | ok ->
98 | {ok, {ssl, Socket}};
99 | {error, closed} ->
100 | {error, econnaborted};
101 | {error, Reason} ->
102 | {error, Reason}
103 | end;
104 | {error, Reason} ->
105 | {error, Reason}
106 | end.
107 |
108 | recv({tcp, Socket}, Size, Timeout) ->
109 | gen_tcp:recv(Socket, Size, Timeout);
110 | recv({ssl, Socket}, Size, Timeout) ->
111 | ssl:recv(Socket, Size, Timeout).
112 |
113 | send({tcp, Socket}, Data) ->
114 | gen_tcp:send(Socket, Data);
115 | send({ssl, Socket}, Data) ->
116 | ssl:send(Socket, Data).
117 |
118 | peername({tcp, Socket}) ->
119 | inet:peername(Socket);
120 | peername({ssl, Socket}) ->
121 | ssl:peername(Socket).
122 |
--------------------------------------------------------------------------------
/src/swim_state.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | -module(swim_state).
22 | -behavior(gen_server).
23 |
24 | -export([start_link/4]).
25 | -export([local_member/0]).
26 | -export([local_state/0]).
27 | -export([members/0]).
28 | -export([proxies/1]).
29 | -export([broadcasts/1]).
30 | -export([ack/1]).
31 | -export([probe_timeout/2]).
32 | -export([handle_event/1]).
33 | -export([publish/1]).
34 |
35 | -export([init/1]).
36 | -export([handle_call/3]).
37 | -export([handle_cast/2]).
38 | -export([handle_info/2]).
39 | -export([code_change/3]).
40 | -export([terminate/2]).
41 |
42 | -record(state, {
43 | protocol_period :: pos_integer(),
44 | ack_timeout :: pos_integer(),
45 | probe_timeout :: pos_integer(),
46 | num_proxies :: pos_integer(),
47 | current_probe :: undefined | {swim:member(), swim:incarnation()},
48 | membership :: swim_membership:membership(),
49 | broadcasts :: swim_broadcasts:broadcasts(),
50 | awareness :: swim_awareness:awareness()
51 | }).
52 |
53 | start_link(Membership, Broadcasts, Awareness, Opts) ->
54 | gen_server:start_link({local, ?MODULE}, ?MODULE, [Membership, Broadcasts, Awareness, Opts], []).
55 |
56 | local_member() ->
57 | gen_server:call(?MODULE, local_member).
58 |
59 | local_state() ->
60 | gen_server:call(?MODULE, local_state).
61 |
62 | members() ->
63 | gen_server:call(?MODULE, members).
64 |
65 | proxies(Target) ->
66 | gen_server:call(?MODULE, {proxies, Target}).
67 |
68 | broadcasts(Target) ->
69 | gen_server:call(?MODULE, {broadcasts, Target}).
70 |
71 | publish(Event) ->
72 | gen_server:cast(?MODULE, {publish, Event}).
73 |
74 | ack(Member) ->
75 | gen_server:cast(?MODULE, {ack, Member}).
76 |
77 | probe_timeout(Member, MissedNacks) ->
78 | gen_server:cast(?MODULE, {probe_timeout, Member, MissedNacks}).
79 |
80 | handle_event(Event) ->
81 | gen_server:cast(?MODULE, {broadcast_event, Event}).
82 |
83 | %% @private
84 | init([Membership, Broadcasts, Awareness, Opts]) ->
85 | State =
86 | #state{
87 | membership = Membership,
88 | broadcasts = Broadcasts,
89 | awareness = Awareness,
90 | ack_timeout = maps:get(ack_timeout, Opts),
91 | probe_timeout = maps:get(probe_timeout, Opts),
92 | protocol_period = maps:get(protocol_period, Opts),
93 | num_proxies = maps:get(num_proxies, Opts)
94 | },
95 | self() ! protocol_period,
96 | {ok, State}.
97 |
98 | %% @private
99 | handle_call(local_member, _From, State) ->
100 | #state{membership = Membership} = State,
101 | {reply, swim_membership:local_member(Membership), State};
102 | handle_call(members, _From, State) ->
103 | {reply, swim_membership:members(State#state.membership), State};
104 | handle_call(local_state, _From, State) ->
105 | {reply, swim_membership:local_state(State#state.membership), State};
106 | handle_call({proxies, Target}, _From, State) ->
107 | Proxies = swim_membership:proxies(State#state.num_proxies, Target, State#state.membership),
108 | {reply, Proxies, State};
109 | handle_call({broadcasts, Target}, _From, State) ->
110 | #state{membership = Membership, broadcasts = Broadcasts0} = State,
111 | {Events, Broadcasts1} = swim_broadcasts:take(Target, Broadcasts0),
112 | NumMembers = swim_membership:size(Membership),
113 | Retransmits = swim_broadcasts:retransmit_limit(NumMembers, Broadcasts1),
114 | Broadcasts2 = swim_broadcasts:prune(Retransmits, Broadcasts1),
115 | {reply, Events, State#state{broadcasts = Broadcasts2}};
116 | handle_call(_Msg, _From, State) ->
117 | {noreply, State}.
118 |
119 | %% @private
120 | handle_cast({ack, Member}, #state{current_probe = {Member, Incarnation}} = State) ->
121 | {noreply, handle_ack(Member, Incarnation, State)};
122 | handle_cast({probe_timeout, Member, MissedNacks}, State) ->
123 | {noreply, handle_probe_timeout(Member, MissedNacks, State)};
124 | handle_cast({publish, Event}, State) ->
125 | Broadcasts = swim_broadcasts:insert({user, Event}, State#state.broadcasts),
126 | {noreply, State#state{broadcasts = Broadcasts}};
127 | handle_cast({broadcast_event, Event}, State) ->
128 | {Events, Membership} = swim_membership:handle_event(Event, State#state.membership),
129 | Awareness =
130 | case swim_membership:refuted(Events, Membership) of
131 | true -> swim_awareness:failure(State#state.awareness);
132 | false -> State#state.awareness
133 | end,
134 | Broadcasts = swim_broadcasts:insert(Events, State#state.broadcasts),
135 | ok = swim_subscriptions:publish(Events),
136 | {noreply, State#state{membership = Membership, broadcasts = Broadcasts, awareness = Awareness}};
137 | handle_cast(_Msg, State) ->
138 | {noreply, State}.
139 |
140 | %% @private
141 | handle_info(protocol_period, State) ->
142 | NewState = handle_protocol_period(State),
143 | schedule_next_protocol_period(NewState),
144 | {noreply, NewState};
145 | handle_info({suspicion_timeout, Member, SuspectedAt}, State) ->
146 | {Events, Membership} =
147 | swim_membership:faulty(Member, SuspectedAt, local, State#state.membership),
148 | Broadcasts = swim_broadcasts:insert(Events, State#state.broadcasts),
149 | ok = swim_subscriptions:publish(Events),
150 | {noreply, State#state{membership = Membership, broadcasts = Broadcasts}};
151 | handle_info(_Info, State) ->
152 | {noreply, State}.
153 |
154 | %% @private
155 | code_change(_OldVsn, State, _Extra) ->
156 | {ok, State}.
157 |
158 | %% @private
159 | terminate(_Reason, _State) ->
160 | ok.
161 |
162 | handle_probe_timeout(Member, MissedNacks, #state{current_probe = {Member, Incarnation}} = State) ->
163 | #state{membership = Membership0, broadcasts = Broadcasts0, awareness = Awareness0} = State,
164 | {Events, Membership} = swim_membership:suspect(Member, Incarnation, local, Membership0),
165 | Broadcasts = swim_broadcasts:insert(Events, Broadcasts0),
166 | ok = swim_subscriptions:publish(Events),
167 | Awareness = swim_awareness:failure(MissedNacks + 1, Awareness0),
168 | State#state{
169 | current_probe = undefined,
170 | membership = Membership,
171 | broadcasts = Broadcasts,
172 | awareness = Awareness
173 | };
174 | handle_probe_timeout(_Member, _MissedNacks, State) ->
175 | State.
176 |
177 | handle_ack(Member, Incarnation, State) ->
178 | #state{membership = Membership0, broadcasts = Broadcasts0, awareness = Awareness0} = State,
179 | {Events, Membership} = swim_membership:alive(Member, Incarnation, Membership0),
180 | Broadcasts = swim_broadcasts:insert(Events, Broadcasts0),
181 | ok = swim_subscriptions:publish(Events),
182 | Awareness = swim_awareness:success(Awareness0),
183 | State#state{
184 | membership = Membership,
185 | broadcasts = Broadcasts,
186 | awareness = Awareness,
187 | current_probe = undefined
188 | }.
189 |
190 | handle_protocol_period(State) ->
191 | case swim_membership:probe_target(State#state.membership) of
192 | none ->
193 | State;
194 | {{Target, _} = Probe, Membership} ->
195 | ProbeTimeout = swim_awareness:scale(State#state.probe_timeout, State#state.awareness),
196 | ok = swim_failure:probe(Target, State#state.ack_timeout, ProbeTimeout),
197 | State#state{current_probe = Probe, membership = Membership}
198 | end.
199 |
200 | schedule_next_protocol_period(State) ->
201 | #state{awareness = Awareness, protocol_period = ProtocolPeriod} = State,
202 | Timeout = swim_awareness:scale(ProtocolPeriod, Awareness),
203 | swim_time:send_after(Timeout, self(), protocol_period).
204 |
--------------------------------------------------------------------------------
/src/swim_subscriptions.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | -module(swim_subscriptions).
22 | -behavior(gen_event).
23 |
24 | -export([start_link/0]).
25 |
26 | -export([subscribe/2]).
27 | -export([unsubscribe/2]).
28 | -export([publish/1]).
29 |
30 | -export([init/1]).
31 | -export([handle_event/2]).
32 | -export([handle_call/2]).
33 | -export([handle_info/2]).
34 | -export([terminate/2]).
35 | -export([code_change/3]).
36 |
37 | -record(state, {
38 | pid :: pid(),
39 | event_category :: user | membership,
40 | mref :: reference()
41 | }).
42 |
43 | start_link() ->
44 | gen_event:start_link({local, ?MODULE}).
45 |
46 | subscribe(EventCategory, Pid) ->
47 | gen_event:add_handler(?MODULE, ?MODULE, [EventCategory, Pid]).
48 |
49 | unsubscribe(EventCategory, Pid) ->
50 | gen_event:delete_handler(?MODULE, ?MODULE, [EventCategory, Pid]).
51 |
52 | publish(Events) when is_list(Events) ->
53 | [publish(Event) || Event <- Events],
54 | ok;
55 | publish({membership, {alive, _, M}}) ->
56 | gen_event:notify(?MODULE, {membership, {alive, M}});
57 | publish({membership, {faulty, _, M, T}}) ->
58 | gen_event:notify(?MODULE, {membership, {faulty, M, T}});
59 | publish({user, Event}) ->
60 | gen_event:notify(?MODULE, {user, Event});
61 | publish(_) ->
62 | ok.
63 |
64 | init([EventCategory, Pid]) ->
65 | MRef = erlang:monitor(process, Pid),
66 | {ok, #state{pid = Pid, event_category = EventCategory, mref = MRef}}.
67 |
68 | handle_event({EventCategory, _Event} = Data, State)
69 | when State#state.event_category =:= EventCategory ->
70 | State#state.pid ! {swim, Data},
71 | {ok, State};
72 | handle_event(_Event, State) ->
73 | {ok, State}.
74 |
75 | handle_call(_Msg, State) ->
76 | {ok, ok, State}.
77 |
78 | handle_info({'DOWN', MRef, process, Pid, _Reason}, State)
79 | when State#state.mref =:= MRef andalso State#state.pid =:= Pid ->
80 | remove_handler;
81 | handle_info(_Info, State) ->
82 | {ok, State}.
83 |
84 | terminate(_Reason, _State) ->
85 | ok.
86 |
87 | code_change(_OldVsn, State, _Extra) ->
88 | {ok, State}.
89 |
--------------------------------------------------------------------------------
/src/swim_sup.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | -module(swim_sup).
22 | -behavior(supervisor).
23 |
24 | -export([start_link/0]).
25 | -export([init/1]).
26 |
27 | start_link() ->
28 | supervisor:start_link({local, ?MODULE}, ?MODULE, []).
29 |
30 | init([]) ->
31 | ListenIP = application:get_env(swim, ip, {127,0,0,1}),
32 | ListenPort = application:get_env(swim, port, 5000),
33 | AckTimeout = application:get_env(swim, ack_timeout, 100),
34 | NackTimeout = application:get_env(swim, nack_timeout, floor(AckTimeout * 0.8)),
35 | ProbeTimeout = application:get_env(swim, probe_timeout, 500),
36 | ProtocolPeriod = application:get_env(swim, protocol_period, 1000),
37 | NumProxies = application:get_env(swim, num_proxies, 3),
38 | SuspicionFactor = application:get_env(swim, suspicion_factor, 3),
39 | AwarenessCount = application:get_env(swim, awareness_count, 8),
40 | Alpha = application:get_env(swim, alpha, 5),
41 | Beta = application:get_env(swim, beta, 6),
42 | Retransmits = application:get_env(swim, retransmit_factor, 3),
43 | MaxMessageSize = application:get_env(swim, max_message_size, 452),
44 | LocalMember = {ListenIP, ListenPort},
45 | Membership = swim_membership:new(LocalMember, Alpha, Beta, ProbeTimeout, SuspicionFactor),
46 | Broadcasts = swim_broadcasts:new(Retransmits, MaxMessageSize),
47 | Awareness = swim_awareness:new(AwarenessCount),
48 | StateOpts = #{
49 | protocol_period => ProtocolPeriod,
50 | probe_timeout => ProbeTimeout,
51 | ack_timeout => AckTimeout,
52 | num_proxies => NumProxies
53 | },
54 | State = #{id => state,
55 | start => {swim_state, start_link, [Membership, Broadcasts, Awareness, StateOpts]}},
56 | Keyring = swim_keyring:new(get_key()),
57 | Failure = #{id => failure,
58 | start => {swim_failure, start_link,
59 | [LocalMember, Keyring, AckTimeout, NackTimeout]}},
60 | PushPull = #{id => pushpull,
61 | start => {swim_pushpull_sup, start_link, [ListenIP, ListenPort]}},
62 | Metrics = #{id => metrics,
63 | start => {swim_metrics, start_link, []}},
64 | Subscriptions = #{id => subscriptions,
65 | start => {swim_subscriptions, start_link, []}},
66 | Flags = #{strategy => rest_for_one,
67 | intensity => 5,
68 | period => 900
69 | },
70 | {ok, {Flags, [State, Failure, PushPull, Subscriptions, Metrics]}}.
71 |
72 | read_key_file({ok, KeyFile}) ->
73 | {ok, EncodedKey} = file:read_file(KeyFile),
74 | [base64:decode(EncodedKey)];
75 | read_key_file(undefined) ->
76 | [crypto:strong_rand_bytes(32)].
77 |
78 | get_key() ->
79 | case application:get_env(swim, key) of
80 | {ok, Base64Key} ->
81 | [base64:decode(Base64Key)];
82 | undefined ->
83 | read_key_file(application:get_env(swim, keyfile))
84 | end.
85 |
--------------------------------------------------------------------------------
/src/swim_time.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | %%% @copyright 2015-2017
19 | %%% @version {@version}
20 |
21 | -module(swim_time).
22 |
23 | -export([send_after/3]).
24 | -export([cancel_timer/1]).
25 | -export([cancel_timer/2]).
26 | -export([monotonic_time/0]).
27 |
28 | send_after(Time, Dest, Msg) ->
29 | erlang:send_after(Time, Dest, Msg).
30 |
31 | cancel_timer(TRef) ->
32 | erlang:cancel_timer(TRef).
33 |
34 | cancel_timer(TRef, Options) ->
35 | erlang:cancel_timer(TRef, Options).
36 |
37 | monotonic_time() ->
38 | erlang:monotonic_time().
39 |
--------------------------------------------------------------------------------
/test/property_test/prop_swim_broadcasts.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | -module(prop_swim_broadcasts).
19 |
20 | -include_lib("proper/include/proper.hrl").
21 |
22 | -compile([export_all]).
23 |
24 | -import(swim_generators, [swim_event/0]).
25 |
26 | -record(state, {
27 | events = [] :: {non_neg_integer(), swim:swim_event()},
28 | pruned = [] :: swim:membership_event()
29 | }).
30 |
31 | g_retransmits() ->
32 | range(1, 10).
33 |
34 | initial_state() ->
35 | #state{events = [], pruned = []}.
36 |
37 | command(_State) ->
38 | frequency([
39 | {1, {call, ?MODULE, insert, [swim_event()]}},
40 | {1, {call, ?MODULE, take, []}},
41 | {1, {call, ?MODULE, prune, [g_retransmits()]}}
42 | ]).
43 |
44 | precondition(#state{events = []}, {call, ?MODULE, take, _}) ->
45 | false;
46 | precondition(#state{events = []}, {call, ?MODULE, prune, _}) ->
47 | false;
48 | precondition(_State, _Call) ->
49 | true.
50 |
51 | next_state(State, _V, {call, ?MODULE, insert, [Event]}) ->
52 | State#state{events = lists:sort(fun sort/2, [{0, Event} | State#state.events])};
53 | next_state(State, _V, {call, ?MODULE, take, []}) ->
54 | N = min(length(State#state.events), 11),
55 | {Taken0, Rest} = lists:split(N, lists:sort(fun sort/2, State#state.events)),
56 | Taken = [{T + 1, E} || {T, E} <- Taken0],
57 | State#state{events = lists:sort(fun sort/2, Taken ++ Rest)};
58 | next_state(State, _V, {call, ?MODULE, prune, [Retransmit]}) ->
59 | Partition = fun({T, _}) -> T < Retransmit end,
60 | {Keep, Pruned0} = lists:partition(Partition, State#state.events),
61 | Pruned = lists:foldl(fun({_, E}, Acc) -> [E | Acc] end, State#state.pruned, Pruned0),
62 | State#state{events = lists:sort(fun sort/2, Keep), pruned = Pruned}.
63 |
64 | postcondition(State, {call, ?MODULE, take, []}, Result) ->
65 | Events = [E || {_, E} <- State#state.events],
66 | lists:all(fun(M) -> lists:member(M, Events) end, Result);
67 | postcondition(_State, {call, ?MODULE, insert, [_Event]}, _Result) ->
68 | true;
69 | postcondition(_State, {call, ?MODULE, prune, [_Retransmit]}, _Result) ->
70 | true.
71 |
72 | prop_swim_broadcasts() ->
73 | ?FORALL(Cmds, commands(?MODULE),
74 | begin
75 | start_link(),
76 | {H, S, R} = run_commands(?MODULE, Cmds),
77 | stop(),
78 | ?WHENFAIL(
79 | print_results(H, S, R),
80 | aggregate(command_names(Cmds), R =:= ok))
81 | end).
82 |
83 | print_results(H, S, R) ->
84 | io:format("History: ~p~nState: ~p~nResult:~p~n", [H, S, R]).
85 |
86 | sort({_, {user, _}}, {_, {membership, _}}) -> false;
87 | sort({_, {membership, _}}, {_, {user, _}}) -> true;
88 | sort(A, B) -> A =< B.
89 |
90 | take() ->
91 | gen_server:call(?MODULE, take, 500).
92 |
93 | insert(Event) ->
94 | gen_server:call(?MODULE, {insert, Event}, 500).
95 |
96 | prune(Retransmits) ->
97 | gen_server:call(?MODULE, {prune, Retransmits}, 500).
98 |
99 | start_link() ->
100 | gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
101 |
102 | stop() ->
103 | gen_server:stop(?MODULE).
104 |
105 | init([]) ->
106 | {ok, swim_broadcasts:new(3)}.
107 |
108 | handle_call(take, _From, Broadcasts0) ->
109 | {Take, Broadcasts} = swim_broadcasts:take(Broadcasts0),
110 | {reply, Take, Broadcasts};
111 | handle_call({insert, Event}, _From, Broadcasts) ->
112 | {reply, ok, swim_broadcasts:insert(Event, Broadcasts)};
113 | handle_call({prune, Num}, _From, Broadcasts) ->
114 | {reply, ok, swim_broadcasts:prune(Num, Broadcasts)}.
115 |
116 | handle_cast(_Msg, State) ->
117 | {noreply, State}.
118 |
119 | handle_info(_Info, State) ->
120 | {noreply, State}.
121 |
122 | code_change(_OldVsn, State, _Extra) ->
123 | {ok, State}.
124 |
125 | terminate(_Reason, _State) ->
126 | ok.
127 |
--------------------------------------------------------------------------------
/test/property_test/prop_swim_keyring.erl:
--------------------------------------------------------------------------------
1 | -module(prop_swim_keyring).
2 |
3 | -include_lib("proper/include/proper.hrl").
4 |
5 | -export([prop_encryption/0]).
6 |
7 | -import(swim_generators, [swim_message/0]).
8 |
9 | g_key() ->
10 | binary(32).
11 |
12 | g_symmetric_keys() ->
13 | ?LET(Key, g_key(), {swim_keyring:new([Key]), swim_keyring:new([Key])}).
14 |
15 | g_asymmetric_keys() ->
16 | ?LET({Key1, Key2},
17 | ?SUCHTHAT({K1, K2}, {g_key(), g_key()}, K1 /= K2),
18 | {swim_keyring:new([Key1]), swim_keyring:new([Key2])}).
19 |
20 | g_keypair() ->
21 | oneof([g_symmetric_keys(), g_asymmetric_keys()]).
22 |
23 | g_encoded_message() ->
24 | ?LET(Message, swim_message(),
25 | iolist_to_binary(swim_messages:encode(Message))).
26 |
27 | prop_encryption() ->
28 | ?FORALL(Keypair, g_keypair(),
29 | ?FORALL(Message, g_encoded_message(),
30 | begin
31 | case is_symmetric(Keypair) of
32 | true ->
33 | aggregate([symmetric],
34 | assert_encryption(Keypair, Message));
35 | false ->
36 | aggregate([asynmetric],
37 | refute_encryption(Keypair, Message))
38 | end
39 | end)).
40 |
41 | is_symmetric({Key, Key}) ->
42 | true;
43 | is_symmetric(_) ->
44 | false.
45 |
46 | assert_encryption({Key1, Key2}, Message) ->
47 | Encrypted = swim_keyring:encrypt(Message, Key1),
48 | case swim_keyring:decrypt(Encrypted, Key2) of
49 | {ok, Message} ->
50 | true;
51 | _ ->
52 | false
53 | end.
54 |
55 | refute_encryption({Key1, Key2}, Message) ->
56 | Encrypted = swim_keyring:encrypt(Message, Key1),
57 | case swim_keyring:decrypt(Encrypted, Key2) of
58 | {error, failed_verification} ->
59 | true;
60 | _ ->
61 | false
62 | end.
63 |
--------------------------------------------------------------------------------
/test/property_test/prop_swim_membership.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017 All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | -module(prop_swim_membership).
19 |
20 | -include_lib("proper/include/proper.hrl").
21 |
22 | -behavior(proper_statem).
23 |
24 | -export([prop_membership/0]).
25 |
26 | -export([command/1]).
27 | -export([initial_state/0]).
28 | -export([next_state/3]).
29 | -export([postcondition/3]).
30 | -export([precondition/2]).
31 |
32 | -export([alive/2]).
33 | -export([suspect/2]).
34 | -export([faulty/2]).
35 | -export([members/0]).
36 |
37 | -export([start_link/1]).
38 | -export([init/1]).
39 | -export([handle_call/3]).
40 | -export([handle_cast/2]).
41 | -export([handle_info/2]).
42 | -export([code_change/3]).
43 | -export([terminate/2]).
44 |
45 | -import(swim_generators, [g_member/0, g_incarnation/0]).
46 |
47 | -record(state, {
48 | me :: swim:member(),
49 | incarnation = 0 :: swim:incarnation(),
50 | members = [] :: [{swim:member(), alive | suspect | faulty, swim:incarnation()}]
51 | }).
52 |
53 | g_local_member(State) ->
54 | {exactly(State#state.me), exactly(State#state.incarnation)}.
55 |
56 | g_non_local_member(State) ->
57 | ?LET(IncFactor, range(-1, 1),
58 | ?LET({Member, _CurrentStatus, CurrentInc},
59 | oneof(State#state.members),
60 | {Member, CurrentInc + IncFactor})).
61 |
62 | g_existing_member(State) ->
63 | oneof([g_local_member(State), g_non_local_member(State)]).
64 |
65 | g_existing_suspected_member(State) ->
66 | ?LET({Member, _CurrentStatus, CurrentInc},
67 | ?SUCHTHATMAYBE({_Member, CurrentStatus, _CurrentInc},
68 | oneof(State#state.members),
69 | CurrentStatus =:= suspect),
70 | {Member, CurrentInc}).
71 |
72 | g_suspected_member(State) ->
73 | frequency([{1, {g_member(State), g_incarnation()}}] ++
74 | [{1, g_existing_member(State)} || State#state.members =/= []] ++
75 | [{2, g_existing_suspected_member(State)} || State#state.members =/= []]).
76 |
77 | g_suspecting_member(State) ->
78 | frequency([{5, g_member(State)}, {2, local}]).
79 |
80 | g_member(State) ->
81 | frequency([{1, g_member()}] ++
82 | [{3, g_existing_member(State)} || State#state.members =/= []]).
83 |
84 | initial_state() ->
85 | #state{me = {{127,0,0,1},5000}}.
86 |
87 | command(State) ->
88 | oneof([
89 | {call, ?MODULE, alive, [g_member(State), g_incarnation()]},
90 | {call, ?MODULE, suspect,
91 | [g_suspected_member(State), g_suspecting_member(State)]},
92 | {call, ?MODULE, faulty,
93 | [g_suspected_member(State), g_suspecting_member(State)]},
94 | {call, ?MODULE, members, []}
95 | ]).
96 |
97 | precondition(#state{members = []}, {call, ?MODULE, suspect, _}) ->
98 | false;
99 | precondition(#state{members = []}, {call, ?MODULE, faulty, _}) ->
100 | false;
101 | precondition(_State, {call, ?MODULE, suspect, [Member, _Inc, Member]}) ->
102 | false;
103 | precondition(_State, {call, ?MODULE, faulty, [Member, _Inc, Member]}) ->
104 | false;
105 | precondition(_State, _Call) ->
106 | true.
107 |
108 | postcondition(State, {call, ?MODULE, members, []}, Members) ->
109 | ordsets:subtract(ordsets:from_list(State#state.members),
110 | ordsets:from_list(Members)) =:= [];
111 | postcondition(_State, {call, ?MODULE, alive, [_Member, _Inc]}, ok) ->
112 | true;
113 | postcondition(_State, {call, ?MODULE, suspect, [{_Member, _Inc}, _From]}, ok) ->
114 | true;
115 | postcondition(_State, {call, ?MODULE, faulty, [{_Member, _Inc}, _From]}, ok) ->
116 | true.
117 |
118 | next_state(State, _V, {call, ?MODULE, members, []}) ->
119 | State;
120 | next_state(State, _V, {call, ?MODULE, alive, [Member, Incarnation]}) ->
121 | #state{members = KnownMembers, incarnation = LocalIncarnation} = State,
122 | case State#state.me =:= Member of
123 | true ->
124 | case Incarnation > LocalIncarnation of
125 | true ->
126 | State#state{incarnation = Incarnation + 1};
127 | false ->
128 | State
129 | end;
130 | false ->
131 | case lists:keytake(Member, 1, KnownMembers) of
132 | false ->
133 | NewMembers = [{Member, alive, Incarnation} | KnownMembers],
134 | State#state{members = NewMembers};
135 | {value, {Member, _CurrentStatus, CurrentIncarnation}, Rest}
136 | when Incarnation > CurrentIncarnation ->
137 | NewMembers = [{Member, alive, Incarnation} | Rest],
138 | State#state{members = NewMembers};
139 | _ ->
140 | State
141 | end
142 | end;
143 | next_state(State, _V, {call, ?MODULE, suspect, [{Member, Incarnation}, _From]}) ->
144 | case State#state.me =:= Member of
145 | true ->
146 | case Incarnation >= State#state.incarnation of
147 | true ->
148 | State#state{incarnation = Incarnation + 1};
149 | false ->
150 | State
151 | end;
152 | false ->
153 | case lists:keytake(Member, 1, State#state.members) of
154 | false ->
155 | State;
156 | {value, {Member, _CurrentStatus, CurrentIncarnation}, Rest}
157 | when Incarnation >= CurrentIncarnation ->
158 | NewMembers = [{Member, suspect, Incarnation} | Rest],
159 | State#state{members = NewMembers};
160 | _ ->
161 | State
162 | end
163 | end;
164 | next_state(State, _V, {call, ?MODULE, faulty, [{Member, Incarnation}, _From]}) ->
165 | case State#state.me =:= Member of
166 | true ->
167 | case Incarnation >= State#state.incarnation of
168 | true ->
169 | State#state{incarnation = Incarnation + 1};
170 | false ->
171 | State
172 | end;
173 | false ->
174 | case lists:keytake(Member, 1, State#state.members) of
175 | false ->
176 | State;
177 | {value, {Member, suspect, CurrentIncarnation}, Rest}
178 | when Incarnation >= CurrentIncarnation ->
179 | State#state{members = Rest};
180 | _ ->
181 | State
182 | end
183 | end.
184 |
185 | prop_membership() ->
186 | ?FORALL(Cmds, commands(?MODULE),
187 | begin
188 | {ok, _} = start_link({{127,0,0,1}, 5000}),
189 | {H, S, R} = run_commands(?MODULE, Cmds),
190 | stop(),
191 | ?WHENFAIL(
192 | io:format("History: ~p~nState: ~p~nResult: ~p~n", [H, S, R]),
193 | aggregate(command_names(Cmds), R =:= ok))
194 | end).
195 |
196 | alive(Member, Incarnation) ->
197 | gen_server:call(?MODULE, {alive, Member, Incarnation}).
198 |
199 | suspect({Member, Incarnation}, From) ->
200 | gen_server:call(?MODULE, {suspect, Member, Incarnation, From}).
201 |
202 | faulty({Member, Incarnation}, From) ->
203 | gen_server:call(?MODULE, {faulty, Member, Incarnation, From}).
204 |
205 | members() ->
206 | gen_server:call(?MODULE, members).
207 |
208 | start_link(LocalMember) ->
209 | gen_server:start_link({local, ?MODULE}, ?MODULE, [LocalMember], []).
210 |
211 | stop() ->
212 | gen_server:stop(?MODULE).
213 |
214 | init([LocalMember]) ->
215 | {ok, swim_membership:new(LocalMember, 5, 6, 500, 3)}.
216 |
217 | handle_call({alive, Member, Incarnation}, _, Membership0) ->
218 | {_, Membership} = swim_membership:alive(Member, Incarnation, Membership0),
219 | {reply, ok, Membership};
220 | handle_call({suspect, Member, Incarnation, From}, _, Membership0) ->
221 | {_, Membership} = swim_membership:suspect(Member, Incarnation, From, Membership0),
222 | {reply, ok, Membership};
223 | handle_call({faulty, Member, Incarnation, From}, _, Membership0) ->
224 | {_, Membership} = swim_membership:faulty(Member, Incarnation, From, Membership0),
225 | {reply, ok, Membership};
226 | handle_call(members, _, Membership) ->
227 | Members = swim_membership:members(Membership),
228 | {reply, Members, Membership}.
229 |
230 | handle_cast(_Msg, Membership) ->
231 | {noreply, Membership}.
232 |
233 | handle_info(_Info, Membership) ->
234 | {noreply, Membership}.
235 |
236 | code_change(_OldVsn, State, _Extra) ->
237 | {ok, State}.
238 |
239 | terminate(_Reason, _State) ->
240 | ok.
241 |
242 |
--------------------------------------------------------------------------------
/test/property_test/prop_swim_messages.erl:
--------------------------------------------------------------------------------
1 | %%% ----------------------------------------------------------------------------
2 | %%% Copyright (c) 2015-2017. All Rights Reserved.
3 | %%%
4 | %%% Licensed under the Apache License,
5 | %%% Version 2.0 (the "License"); you may not use this file except in compliance
6 | %%% with the License.
7 | %%% You may obtain a copy of the License at
8 | %%%
9 | %%% http://www.apache.org/licenses/LICENSE-2.0
10 | %%%
11 | %%% Unless required by applicable law or agreed to in writing, software
12 | %%% distributed under the License is distributed on an "AS IS" BASIS,
13 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %%% See the License for the specific language governing permissions and
15 | %%% limitiations under the License.
16 | %%% ----------------------------------------------------------------------------
17 |
18 | -module(prop_swim_messages).
19 |
20 | -include_lib("proper/include/proper.hrl").
21 |
22 | -export([prop_encode_decode/0]).
23 |
24 | -import(swim_generators, [swim_message/0]).
25 |
26 | prop_encode_decode() ->
27 | ?FORALL({Type, _Events} = Msg, swim_message(),
28 | aggregate([element(1, Type)], begin
29 | Data = iolist_to_binary(swim_messages:encode(Msg)),
30 | Msg =:= swim_messages:decode(Data)
31 | end)).
32 |
--------------------------------------------------------------------------------
/test/swim_SUITE.erl:
--------------------------------------------------------------------------------
1 | -module(swim_SUITE).
2 |
3 | -include_lib("common_test/include/ct.hrl").
4 |
5 | -export([all/0]).
6 | -export([init_per_suite/1]).
7 | -export([end_per_suite/1]).
8 |
9 | -export([prop_swim_broadcasts/1]).
10 | -export([prop_swim_membership/1]).
11 | -export([prop_swim_keyring/1]).
12 | -export([prop_swim_messages/1]).
13 |
14 | all() ->
15 | [
16 | prop_swim_membership,
17 | prop_swim_messages,
18 | prop_swim_keyring,
19 | prop_swim_broadcasts
20 | ].
21 |
22 | init_per_suite(Config) ->
23 | ct_property_test:init_per_suite(Config).
24 |
25 | end_per_suite(_Config) ->
26 | ok.
27 |
28 | prop_swim_broadcasts(Config) ->
29 | ct_property_test:quickcheck(prop_swim_broadcasts:prop_swim_broadcasts(), Config).
30 |
31 | prop_swim_membership(Config) ->
32 | ct_property_test:quickcheck(prop_swim_membership:prop_membership(), Config).
33 |
34 | prop_swim_keyring(Config) ->
35 | ct_property_test:quickcheck(prop_swim_keyring:prop_encryption(), Config).
36 |
37 | prop_swim_messages(Config) ->
38 | ct_property_test:quickcheck(prop_swim_messages:prop_encode_decode(), Config).
39 |
--------------------------------------------------------------------------------
/test/swim_failure_SUITE.erl:
--------------------------------------------------------------------------------
1 | -module(swim_failure_SUITE).
2 |
3 | -include_lib("common_test/include/ct.hrl").
4 |
5 | -export([all/0]).
6 | -export([groups/0]).
7 | -export([init_per_suite/1]).
8 | -export([end_per_suite/1]).
9 | -export([init_per_group/2]).
10 | -export([end_per_group/2]).
11 |
12 | -export([ping/1]).
13 | -export([ping_req/1]).
14 |
15 | all() ->
16 | [{group, with_client}].
17 |
18 | groups() ->
19 | [{with_client, [shuffle, sequence], [ping, ping_req]}].
20 |
21 | local_member() ->
22 | {{127,0,0,1}, 9200}.
23 |
24 | remote_member() ->
25 | {{127,0,0,1}, 9000}.
26 |
27 | init_per_suite(Config) ->
28 | error_logger:tty(false),
29 | Key = crypto:strong_rand_bytes(32),
30 | RemoteMember = remote_member(),
31 | ok = application:set_env(swim, port, element(2, RemoteMember)),
32 | ok = application:set_env(swim, key, base64:encode(Key)),
33 | ok = application:start(swim),
34 | [{local_member, local_member()}, {remote_member, RemoteMember}, {key, Key} | Config].
35 |
36 | end_per_suite(_Config) ->
37 | ok = application:stop(swim),
38 | error_logger:tty(true),
39 | ok.
40 |
41 | init_per_group(with_client, Config) ->
42 | {ok, Client} = swim_test_client:start(?config(local_member, Config),
43 | ?config(key, Config)),
44 | [{client, Client} | Config].
45 |
46 | end_per_group(with_client, Config) ->
47 | Client = ?config(client, Config),
48 | ok = swim_test_client:stop(Client),
49 | Config.
50 |
51 | ping(Config) ->
52 | Target = ?config(remote_member, Config),
53 | {ack, 1, Target} = call({ping, 1, Target}, Config),
54 | ok.
55 |
56 | ping_req(Config) ->
57 | Target = ?config(remote_member, Config),
58 | Terminal = ?config(local_member, Config),
59 | {ack, 2, Terminal} = call({ping_req, 2, Target, Terminal}, Config),
60 | ok.
61 |
62 | call(Msg, Config) ->
63 | swim_test_client:call(?config(client, Config), Msg).
64 |
--------------------------------------------------------------------------------
/test/swim_generators.erl:
--------------------------------------------------------------------------------
1 | -module(swim_generators).
2 |
3 | -include_lib("proper/include/proper.hrl").
4 |
5 | -compile([export_all]).
6 |
7 | g_ip_address() ->
8 | ip_address().
9 |
10 | ip_address() ->
11 | oneof([
12 | tuple([range(0, 255) || _ <- lists:seq(1, 4)]),
13 | tuple([range(0, 65535) || _ <- lists:seq(1, 8)])
14 | ]).
15 |
16 | g_port_number() ->
17 | port_number().
18 |
19 | port_number() ->
20 | range(0, 65535).
21 |
22 | g_incarnation() ->
23 | incarnation().
24 |
25 | incarnation() ->
26 | range(0, 1 bsl 32).
27 |
28 | g_membership_event() ->
29 | membership_event().
30 |
31 | membership_event() ->
32 | ?LET(Event,
33 | oneof([suspect_event(), alive_event(), faulty_event()]),
34 | {membership, Event}).
35 |
36 | g_suspect_event() ->
37 | suspect_event().
38 |
39 | suspect_event() ->
40 | ?LET({Incarnation, Member, From},
41 | {incarnation(), member(), member()},
42 | {suspect, Incarnation, Member, From}).
43 |
44 | g_alive_event() ->
45 | alive_event().
46 |
47 | alive_event() ->
48 | ?LET({Incarnation, Member},
49 | {incarnation(), member()},
50 | {alive, Incarnation, Member}).
51 |
52 | g_faulty_event() ->
53 | faulty_event().
54 |
55 | faulty_event() ->
56 | ?LET({Incarnation, Member, From},
57 | {incarnation(), member(), member()},
58 | {faulty, Incarnation, Member, From}).
59 |
60 | g_user_event() ->
61 | user_event().
62 |
63 | user_event() ->
64 | ?LET(Bin, binary(), {user, Bin}).
65 |
66 | g_swim_event() ->
67 | swim_event().
68 |
69 | swim_event() ->
70 | oneof([user_event(), membership_event()]).
71 |
72 | g_sequence() ->
73 | sequence().
74 |
75 | sequence() ->
76 | range(0, 1 bsl 32).
77 |
78 | g_member() ->
79 | member().
80 |
81 | member() ->
82 | tuple([ip_address(), port_number()]).
83 |
84 | g_swim_events() ->
85 | swim_events().
86 |
87 | swim_events() ->
88 | ?SIZED(Size, swim_events(Size)).
89 |
90 | swim_events(Size) when Size > 256 ->
91 | resize(round(Size / 2), list(swim_event()));
92 | swim_events(_Size) ->
93 | list(swim_event()).
94 |
95 | g_ack() ->
96 | ack().
97 |
98 | ack() ->
99 | ?LET({Seq, Target, Events}, {sequence(), member(), swim_events()},
100 | {{ack, Seq, Target}, Events}).
101 |
102 | g_nack() ->
103 | nack().
104 |
105 | nack() ->
106 | ?LET({Seq, Target, Events}, {sequence(), member(), swim_events()},
107 | {{nack, Seq, Target}, Events}).
108 |
109 | g_ping() ->
110 | ping().
111 |
112 | ping() ->
113 | ?LET({Seq, Target, Events}, {sequence(), member(), swim_events()},
114 | {{ping, Seq, Target}, Events}).
115 |
116 | g_ping_req() ->
117 | ping_req().
118 |
119 | ping_req() ->
120 | ?LET({Seq, Target, Events}, {sequence(), member(), swim_events()},
121 | {{ping_req, Seq, Target}, Events}).
122 |
123 | g_swim_message() ->
124 | swim_message().
125 |
126 | swim_message() ->
127 | oneof([ack(), ping(), ping_req(), nack()]).
128 |
--------------------------------------------------------------------------------
/test/swim_test_client.erl:
--------------------------------------------------------------------------------
1 | -module(swim_test_client).
2 | -behavior(gen_server).
3 |
4 | -export([start/2]).
5 | -export([stop/1]).
6 | -export([call/2]).
7 |
8 | -export([init/1]).
9 | -export([handle_call/3]).
10 | -export([handle_cast/2]).
11 | -export([handle_info/2]).
12 | -export([code_change/3]).
13 | -export([terminate/2]).
14 |
15 | -record(state, {
16 | local_member,
17 | keyring,
18 | socket,
19 | requests
20 | }).
21 |
22 | start(LocalMember, Key) ->
23 | gen_server:start(?MODULE, [LocalMember, Key], []).
24 |
25 | stop(Pid) ->
26 | gen_server:stop(Pid).
27 |
28 | call(Pid, Msg) ->
29 | try
30 | gen_server:call(Pid, Msg, 500)
31 | catch
32 | _:_ ->
33 | timeout
34 | end.
35 |
36 | init([{_, Port} = LocalMember, Key]) ->
37 | Keyring = swim_keyring:new([Key]),
38 | {ok, Socket} = gen_udp:open(Port, [binary, {active, true}]),
39 | {ok, #state{requests = #{}, socket = Socket, keyring = Keyring, local_member = LocalMember}}.
40 |
41 | handle_call({ping, Sequence, {Ip, Port} = Target}, From, State) ->
42 | Msg = swim_messages:encode({{ping, Sequence, Target}, []}),
43 | Payload = swim_keyring:encrypt(Msg, State#state.keyring),
44 | ok = gen_udp:send(State#state.socket, Ip, Port, Payload),
45 | {noreply, State#state{requests = maps:put(Sequence, From, State#state.requests)}};
46 | handle_call({ping_req, Sequence, {Ip, Port}, Terminal}, From, State) ->
47 | Msg = swim_messages:encode({{ping_req, Sequence, Terminal}, []}),
48 | Payload = swim_keyring:encrypt(Msg, State#state.keyring),
49 | ok = gen_udp:send(State#state.socket, Ip, Port, Payload),
50 | {noreply, State#state{requests = maps:put(Sequence, From, State#state.requests)}}.
51 |
52 | handle_cast(_Req, State) ->
53 | {noreply, State}.
54 |
55 | handle_info({udp, _Socket, Ip, InPortNo, Packet}, State) ->
56 | case swim_keyring:decrypt(Packet, State#state.keyring) of
57 | {ok, PlainText} ->
58 | try
59 | {Message, _Events} = swim_messages:decode(PlainText),
60 | case Message of
61 | {ack, Sequence, _Target} = Reply ->
62 | case maps:take(Sequence, State#state.requests) of
63 | {From, Requests} ->
64 | gen_server:reply(From, Reply),
65 | {noreply, State#state{requests = Requests}};
66 | error ->
67 | {noreply, State}
68 | end;
69 | {ping, Sequence, Target} ->
70 | Ack = swim_messages:encode({{ack, Sequence, Target}, []}),
71 | Payload = swim_keyring:encrypt(Ack, State#state.keyring),
72 | ok = gen_udp:send(State#state.socket, Ip, InPortNo, Payload),
73 | {noreply, State};
74 | _ ->
75 | {noreply, State}
76 | end
77 | catch
78 | _:_ ->
79 | {noreply, State}
80 | end;
81 | {error, failed_verification} ->
82 | {noreply, State}
83 | end;
84 | handle_info(_Info, State) ->
85 | {noreply, State}.
86 |
87 | code_change(_OldVsn, State, _Extra) ->
88 | {ok, State}.
89 |
90 | terminate(_Reason, _State) ->
91 | ok.
92 |
--------------------------------------------------------------------------------