├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE ├── README.md ├── doc ├── architecture.md └── user_guide.md ├── src ├── cluster │ ├── metrics.rs │ ├── mod.rs │ ├── msg.rs │ ├── server.rs │ └── status.rs ├── correlation_id.rs ├── envelope.rs ├── errors.rs ├── executor │ ├── executor.rs │ ├── metrics.rs │ ├── mod.rs │ ├── msg.rs │ └── status.rs ├── histogram.rs ├── lib.rs ├── members.rs ├── metrics.rs ├── msg.rs ├── node.rs ├── node_id.rs ├── pid.rs ├── process.rs ├── serialize │ ├── mod.rs │ ├── msgpack.rs │ ├── protobuf.rs │ └── serialize.rs ├── service │ ├── connection_handler.rs │ ├── mod.rs │ ├── service.rs │ ├── service_handler.rs │ └── tcp_server_handler.rs └── timer_wheel.rs └── tests ├── join_leave.rs ├── multi_node_chain_replication.rs ├── single_node_chain_replication.rs ├── timeout_tests.rs └── utils ├── api_server.rs ├── messages.rs ├── mod.rs └── replica.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | .* 4 | *~ 5 | \#* 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - stable 4 | - nightly 5 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rabble" 3 | version = "0.4.1" 4 | authors = ["Andrew J. Stone "] 5 | description = "A library for creating location transparent actor based systems" 6 | repository = "https://github.com/andrewjstone/rabble" 7 | keywords = ["cluster", "networking", "distributed", "actor", "erlang"] 8 | license = "Apache-2.0" 9 | 10 | [features] 11 | # reexport no_timerfd feature from amy 12 | no_timerfd = ["amy/no_timerfd"] 13 | 14 | [dependencies] 15 | amy = "^0.7.2" 16 | orset = "0.2" 17 | time = "0.1" 18 | net2 = "0.2" 19 | error-chain = "0.5" 20 | libc = "0.2" 21 | log = "0.3" 22 | slog = {version = "1", features = ["max_level_trace"]} 23 | slog-stdlog = "1" 24 | slog-term = "1.1" 25 | slog-envlogger = "0.5" 26 | ferris = "0.1" 27 | protobuf = "1.0.24" 28 | serde = "1.0" 29 | serde_derive = "1.0" 30 | rmp-serde = "0.13" 31 | 32 | [dev-dependencies] 33 | assert_matches = "1.0" 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build 2 | Status](https://travis-ci.org/andrewjstone/rabble.svg?branch=master)](https://travis-ci.org/andrewjstone/rabble) 3 | 4 | [API Documentation](https://docs.rs/rabble) 5 | 6 | ### Usage 7 | 8 | Add the following to your `Cargo.toml` 9 | 10 | ```toml 11 | [dependencies] 12 | rabble = "0.3" 13 | ``` 14 | 15 | Add this to your crate root 16 | 17 | ```rust 18 | extern crate rabble; 19 | ``` 20 | # Description 21 | Rabble provides location independent actor communication over a fully connected mesh of nodes. More 22 | information can be found in the [architecture 23 | doc](https://github.com/andrewjstone/rabble/blob/master/doc/architecture.md) and [user 24 | guide](https://github.com/andrewjstone/rabble/blob/master/doc/user_guide.md). 25 | -------------------------------------------------------------------------------- /doc/architecture.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | This document details the internal structure and design of rabble, as well as justifications for 3 | some of the design decisions. It highlights the major abstractions and how they fit into the bigger 4 | picture of building a large scale clustered network application. An attempt will be made to clarify 5 | where certain decisions were made for expedience, and what may be done in the future to enhance 6 | Rabble. 7 | 8 | Rabble provides a location transparent actor system via a fully connected mesh of 9 | [nodes](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/node.rs) identified by 10 | a unique [NodeId](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/node_id.rs). 11 | Nodes are joined together to form clusters upon which actors can run and communicate via sending 12 | messages. Rabble supports two types of actors in the system: [lightweight 13 | processes](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/process.rs) and 14 | thread based 15 | [services](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/service.rs). Each 16 | actor has a globally unique 17 | [Pid](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/pid.rs) used to identify 18 | and send messages to it. 19 | 20 | # Abstractions 21 | 22 | ### Messages 23 | Rust is a statically typed programming language and contains statically typed channels for 24 | communication between threads. Furthermore, any objects, such as processes, stored in a collection, 25 | must be of the same type. This presents a unique challenge for building an actor system, since 26 | traditionally actor systems allow sending and receiving arbitrary messages. Rabble was therefore 27 | presented with 2 primary choices for messaging between actors. Use a single type of message for all 28 | actors or use dynamically typed messages via [Any](https://doc.rust-lang.org/std/any/) or [Trait 29 | Objects](https://doc.rust-lang.org/stable/book/trait-objects.html). 30 | 31 | Any dynamic type sent over a channel must be boxed and therefore requires an allocation, while a 32 | static type can simply be copied. Furthermore, dynamic types require runtime reflection. They also 33 | add to the implementation complexity of the system, because serialization for sending between nodes 34 | requires direct implementation, rather than derivation via compiler plugins. However, these types do 35 | provide open extensibility and some sort of data hiding, since actors will only attempt to downcast 36 | messages they know about. 37 | 38 | The performance and complexity cost of dynamic types in Rust, as well as the loss of static type 39 | checking capabilities outweighs the benefits of open extensibility. Therefore every message sent 40 | between actors in rabble is a statically typed, parameterized 41 | [Msg](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/msg.rs). The `User(T)` 42 | variant of `Msg` contains the user defined static type, which once again, is shared among *all* 43 | actors in the system. 44 | 45 | Now, there is a caveat to the above description. If a user really desires open extensibility and 46 | doesn't care about the performance penalty or implementation complexity, they can use Boxed types, 47 | as long as they implement the required traits manually, namely: `Debug, Clone, Eq, 48 | PartialEq, Serialize, and Deserialize`. 49 | 50 | Note lastly, that this restriction on a single type for messages only applies to messages sent 51 | between actors. Client APIs may use their own message types. 52 | 53 | 54 | ### Processes 55 | Processes are intended as the primary actor type to be utilized when building systems 56 | upon rabble. Processes implement the `process` trait which consists of an associated type, `Msg`, 57 | and a single method `handle`, shown below. The associated type is the type parameter to the `Msg` 58 | enum, which is the single type shared among actors as described above. 59 | 60 | ```Rust 61 | pub trait Process : Send { 62 | type Msg: Serialize + Deserialize + Debug + Clone; 63 | fn handle(&mut self, 64 | msg: Msg, 65 | from: Pid, correlation_id: 66 | Option, 67 | output: Vec>); 68 | } 69 | ``` 70 | 71 | Processes contain an internal state that can be mutated when a message is handled. Processes can 72 | only responed to messages, and do not generate output without input. Any output messages to actors 73 | in response to the input message are not sent directly over channels but are instead packaged into 74 | [envelopes](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/envelope.rs) and 75 | pushed onto an output Vec. 76 | 77 | The choice to return envelopes inside a mutable Vec of envelopes is an interesting one, so a short 78 | discussion of why this was chosen is in order. A key goal of rabble is to enable easy testability of 79 | indivdiual processes and protocols involving those processes. While other actor systems allow 80 | processes to directly send messages while running a callback, this side-effect behavior is very hard 81 | to test. In a traditional actor language like Erlang, these side-effects manifest as non-determinism 82 | in the ordering of messages due to scheduling behavior of the processes. Re-running a failing test 83 | often results in a different messaging order making the failure hard to reproduce and the root cause 84 | hard to discover. By making the interface to each process a function call that only modifies 85 | internal state and returns envelopes, we can carefully control the ordering of all messages between 86 | processes by a test and can even allow dropping, delaying or re-ordering of those messages in ways 87 | specific to the test itself. This allows for full determinism specified by the test, and allows 88 | building interactive debuggers that can literally step through the messages sent in a system. Note 89 | that while the order of test messages is deterministic and tests are repeatable, covering the entire 90 | state space is still just as hard as in traditional actor systems. Therefore, long-running 91 | simulations of multiple schedules in a [quickcheck](https://github.com/BurntSushi/quickcheck) like 92 | manner, or exhaustive [model checking](https://en.wikipedia.org/wiki/Model_checking) with or without 93 | [partial order reduction](https://en.wikipedia.org/wiki/Partial_order_reduction) of state space is 94 | recommended. 95 | 96 | ### Executor 97 | Each process receives a messages sent to it when its `handle` method gets called, and returns any 98 | output envelopes. But processes are just objects and do not have their own thread of control, so 99 | what is the mechanism that calls a process's handle method and routes responses to other processes? 100 | This component is called the 101 | [Executor](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/executor.rs) and is 102 | responsible for routing all messages on a single node to their corresponding processes, and [calling 103 | the process's handle 104 | method](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/executor.rs#L117). Any 105 | messages destined for actors on another node will be sent over a channel to the cluster server 106 | which will forward the message. The cluster server will be described in the next section. 107 | 108 | For implementation expediency and practical purposes, the executor currently runs in a single 109 | thread. All processes are stored in a [HashMap keyed by their 110 | Pids](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/executor.rs#L21). A 111 | single async channel receiver receives 112 | [ExecutorMsg](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/executor_msg.rs)s 113 | in a [loop](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/executor.rs#L56) 114 | that contain both requests for the executor, as well as envelopes that need to be sent to local 115 | actors in the system. Note that the executor not only calls processes' handle method, it also 116 | forwards envelopes over channels to any service that has it's Pid registered. Services will be 117 | described in a later section. 118 | 119 | ### Cluster Server 120 | The [cluster 121 | server](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/cluster_server.rs) 122 | maintains TCP connections to other nodes and manages cluster membership state. It serves as a bridge 123 | for routing messages between actors on different nodes. It [receives 124 | messages](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/node.rs#L52-L73) 125 | from the executor or 126 | service threads which need to be serialized and sent to processes on other nodes when the 127 | appropriate peer sockets are writable. It also receives [notifications from the kernel 128 | poller](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/cluster_server.rs#L175-L192) that 129 | sockets are available to be read or written, or a timer has fired. [Peer sockets are read from, 130 | messages are deserialized, and then forwarded to the appropriate local actor via the executor 131 | channel](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/cluster_server.rs#L215-L260). 132 | Timer notifications are likewise forwarded to local processes. Services manage their own timers. 133 | 134 | Finally, there needs to be some way of establishing connections and configuring the cluster network. 135 | A [cluster membership 136 | API](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/node.rs#L52-L73) exists 137 | as part of the [Node](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/node.rs) object that sends messages to the cluster server instructing it to change its 138 | membership. Connections will then be established or torn down asynchronously. [Cluster 139 | status](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/cluster_status.rs) 140 | information is retrieved in the [same 141 | manner](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/node.rs#L111-L117). 142 | Note that the cluster membership API is not run in it's own thread, but is run in the context of the 143 | caller. 144 | 145 | ### Services 146 | For constructing I/O bound network protocols, lightweight processes are an excellent choice. 147 | However, since all processes are executed inside a single thread, doing a lot of CPU intensive work, 148 | or making a blocking system call will delay other processes from running and cause latency spikes. 149 | What we need is a way for processes to outsource blocking or expensive operations to other threads. 150 | [Services](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/service.rs) provide 151 | this mechanism. Services are also actors in the system and can send and receive messages with 152 | processes and other services. They enable this by [registering a sender and a Pid with the 153 | executor](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/service.rs#L37). The 154 | executor can then appropriately [route messages to services instead of processes based on the 155 | destination 156 | Pid](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/executor.rs#L106-L109). 157 | 158 | Services are capable of interacting directly with the network, as well as actors. This enables users 159 | to implement admin and API servers to manage and interact with applications running across a rabble 160 | cluster. Note that because services can access the network directly, they can use whatever protocol 161 | or message format the user desires, and do not need to use the same message type as actors. 162 | 163 | In order to create a service, a user must implement a [service 164 | handler](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/service_handler.rs) 165 | to specialize the service. The simplest type of service handler, the [thread 166 | handler](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/thread_handler.rs), 167 | only handles envelopes from other actors and performs no network interaction. This type of handler 168 | is useful for running expensive computations or performing file operations. An example 169 | implementation can be found 170 | [here](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/tests/basic.rs#L26-L29) 171 | Note that the callback receives a node as well as an envelope. This allows it to send replies or 172 | notifications to other actors. 173 | 174 | The second major use for a service, as described above, is for server endpoints. While any network 175 | protocol can be used for this, TCP is a common choice. Therefore a [TCP 176 | handler](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/tcp_server_handler.rs) 177 | has already been implemented in Rabble. This handler supports generic encoding and decoding of 178 | messages by implementing the [Serialize 179 | trait](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/serialize.rs). 180 | A [MsgPack](http://msgpack.org/index.html) based implementation is provided 181 | [here](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/msgpack_serializer.rs). 182 | 183 | In this TCP handler, each connection is independent of other connections, and is structured so that 184 | the user only has to provide callback functions to send and receive messages to and from actors and 185 | the network. If a user wants to use the TCP handler provided by rabble they must implement a 186 | [connection 187 | handler](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/connection_handler.rs). 188 | An example of implementing a connection handler for MsgPack based messages in a chain replication 189 | application exists 190 | [here](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/tests/utils/api_server.rs#L45-L109). 191 | Note that the connection handler trait is not specific to TCP and can be re-used for other 192 | connection based protocols such as SCTP. 193 | 194 | # Limitations 195 | 196 | * Currently there is no backpressure provided by the system. An 197 | [issue](https://github.com/andrewjstone/rabble/issues/2) has been opened. 198 | * Operability is limited by the lack of metrics and status information. An 199 | [issue](https://github.com/andrewjstone/rabble/issues/4) has been opened 200 | for this as well. 201 | * Pids and NodeIds use strings for identifiers. Due to the amount of comparisons on both of these 202 | types, this is extremely inefficient and wasteful. Both Pids and NodeIds should be converted to 203 | [Atoms](http://stackoverflow.com/questions/36023947/how-do-erlang-atoms-work/36025280). An 204 | [issue](https://github.com/andrewjstone/rabble/issues/5) has been created. 205 | 206 | -------------------------------------------------------------------------------- /doc/user_guide.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | Rabble is useful for building distributed, clustered applications where actors can run on different 3 | [Nodes](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/node.rs) and 4 | communicate over the network. This allows for easier implementation of distributed algorithms based 5 | around asynchronous message passing between processes. Actors in rabble are primarily lightweight 6 | [processes](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/process.rs) that 7 | receive and send messages. Thread based 8 | [services](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/service.rs) provide 9 | a way to run computation heavy tasks, interact with the file system, or implement an API server 10 | while retaining the capability to send to and receive messages from other processes and services. 11 | 12 | This guide will show how to get started building a distributed system in rabble from the ground up. 13 | The reader will learn how to create a node, join nodes together, spawn processes to act as peers 14 | in a distributed system, and create an api service to allow interaction with that system. 15 | 16 | # What are we building? 17 | Our example should be complete enough to show off most features of rabble, while not shrouding the 18 | basics with the complexity of the algorithm implementation. In light of this, 19 | we will build a very simple and utterly fault intolerant replicated counter. The service will have 3 nodes, 20 | with a replica on each node. The first node is the primary node, and has a TCP server that can take 21 | requests to either increment the counter or get the current count. When an increment request is received it 22 | will be sent to the primary replica on the same node which will then forward the request to the two 23 | backup replicas and wait for the replies from both replicas. When the primary replica has received the replies it 24 | will go ahead and send a message to the tcp server so it can respond to the client. Requests for the 25 | current count are answered directly from the primary replica. 26 | 27 | Note that this example is simplified in some major ways, and is an absolutely terrible way to build 28 | a distributed counter. It assumes that: 29 | 30 | 1. The network is reliable. Nodes will never become partitioned or lose connectivity. 31 | 2. The network is not asynchronous, and messages are sent in bounded time. In the world of this 32 | example, any messages communication will occur without delay or timeout. 33 | 3. Nodes will never crash. Replicas will always maintain the same position in the primary/backup 34 | relationship and will always have up to date data. 35 | 36 | It probably assumes a bunch more 37 | [fallacies](http://www.lasr.cs.ucla.edu/classes/188_winter15/readings/fallacies.pdf) than those, but 38 | that's enough to show that you shouldn't build a production system in this manner, and that this is 39 | only an example to explain how to use Rabble. 40 | 41 | # Creating your nodes 42 | Each node needs a unique 43 | [NodeId](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/node_id.rs). A node 44 | also needs a msg type for messages sent between actors. All actors can only send and receive a 45 | single message type. You can read more about why 46 | [here](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/doc/architecture.md#messages). 47 | A node can then be started with a call to 48 | [rabble::rouse](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/lib.rs#L80). 49 | 50 | ```Rust 51 | use rabble::NodeId; 52 | 53 | // The message shipped between actors in the system. It must implement these derived traits. 54 | // Serialize and Deserialize provide serialization capability to arbitrary formats. 55 | #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] 56 | enum CounterMsg { 57 | Increment, 58 | Ok, // Backup successfully received the Increment 59 | GetCount, 60 | Count(usize), 61 | } 62 | 63 | let node_ids = create_node_ids(3); 64 | 65 | /// Each call to rabble::rouse spawns a few threads and returns their `JoinHandle`s along with the node. 66 | /// The handles should be joined at some point later in the code. None as the second parameter to 67 | /// rouse means just use the standard logger. 68 | let (nodes, handles) = node_ids.cloned().into_iter().fold((Vec::new(), Vec::new()), |(mut nodes, mut handles), node_id| { 69 | let (node, handle_list) = rabble::rouse::(node_id, None); 70 | nodes.push(node); 71 | handles.extend(handle_list); 72 | (nodes, handles) 73 | }); 74 | 75 | /// Create N node ids with names node1,node2,... and unique IP addresses. Don't create more than 9 :D 76 | pub fn create_node_ids(n: usize) -> Vec { 77 | (1..n + 1).map(|n| { 78 | NodeId { 79 | name: format!("node{}", n), 80 | addr: format!("127.0.0.1:1100{}", n) 81 | } 82 | }).collect() 83 | } 84 | ``` 85 | 86 | # Creating and starting 3 replicas 87 | 88 | We now have 3 nodes up and running. We want to implement a replica process and then start one on 89 | each node. 90 | 91 | First let's create 3 Pids, one for each process, using the ``node_ids`` created previously. Note that 92 | the `group` member of a pid can be used for a variety of reasons including multi-tenancy. For now, 93 | let's just leave it blank. 94 | 95 | ```Rust 96 | let pids = ["replica1", "replica2", "replica3"].iter().zip(node_ids).map(|(name, node_id)| { 97 | Pid { 98 | name: name.to_string(), 99 | group: None, 100 | node: node_id.clone() 101 | } 102 | }).collect() 103 | ``` 104 | 105 | Now we need to define our replica type and implement the counter process. Note that the messages 106 | received by a process are of type 107 | [Msg](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/msg.rs) which is 108 | paramterized by the `CounterMsg`. This allows receipt of system data as well as user defined types. 109 | For now though, we will just concern ourself with the `User(T)` variant of the `Msg` enum. 110 | Additionally, each message has a corresponding 111 | [CorrelationId](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/correlation_id.rs) 112 | used to match requests with responses. Any received messages should put the correlation id in the 113 | outgoing envelopes. 114 | 115 | ```Rust 116 | pub struct Counter { 117 | pid: Pid, 118 | primary: bool, 119 | backups: Vec, 120 | count: usize, 121 | output: Vec>, 122 | 123 | // We have to wait for both backup replies before responding to the client 124 | backup_replies: HashMap 125 | } 126 | 127 | impl Counter { 128 | pub fn new(pid: Pid, primary: Pid, backups: Vec) -> Counter { 129 | // Size the output vector for the expected number of outgoing messages 130 | let size = if pid == primary { 131 | 2 132 | } else { 133 | 1 134 | }; 135 | 136 | Counter { 137 | pid: pid, 138 | primary: primary == pid, 139 | backups: backups, 140 | count: 0, 141 | output: Vec::with_capacity(size), 142 | backup_replies: HashMap::new() 143 | } 144 | } 145 | } 146 | 147 | impl Process for Counter { 148 | // Each process needs a type. We defined it above. It's the one we used to paramaterize the call 149 | // to rabble::rouse() 150 | type Msg = CounterMsg; 151 | 152 | // Each process must implement a single method, `handle`. 153 | fn handle(&mut self, msg: Msg, 154 | from: Pid, 155 | correlation_id: Option, 156 | output: &mut Vec>) 157 | { 158 | match msg { 159 | Msg::User(CounterMsg::Inc) => { 160 | self.count += 1; 161 | if self.primary { 162 | // Send the increment to the two backups 163 | // For now assume correlation_id is a `Some` 164 | self.backup_replies.insert(correlation_id.as_ref().unwrap().clone(), 0); 165 | for &b in self.backups { 166 | let msg = Msg::User(CounterMsg::Inc); 167 | let envelope = Envelope::new(b.clone(), self.pid.clone(), msg, correlation_id); 168 | output.push(envelope); 169 | } 170 | } else { 171 | // Respond to the primary 172 | let reply = Msg::User(CounterMsg::Ok); 173 | let envelope = Envelope::new(from, self.pid.clone(), reply, correlation_id); 174 | output.push(envelope); 175 | } 176 | }, 177 | Msg::User(CounterMsg::GetCount) => { 178 | // Only the primary gets this message 179 | let reply = Msg::User(CounterMsg::Count(self.count)); 180 | let envelope = Envelope::new(from, self.pid.clone(), reply, correlation_id); 181 | output.push(envelope); 182 | }, 183 | Msg::User(CounterMsg::Ok) => { 184 | // Increment the backup_replies. Once we have received both, reply to the client 185 | // Do this in a block to limit the borrow scope 186 | let count = { 187 | let count = self.backup_replies.get_mut(correlation_id.as_ref().unwrap()).unwrap(); 188 | *count += 1; 189 | *count 190 | }; 191 | 192 | if count == 2 { 193 | self.backup_replies.remove(correlation_id.as_ref().unwrap()); 194 | // Send to the original requester, not the sender. For now assume the correlation_id 195 | // is a Some(id). It has to be for any chained req/response to work properly. 196 | let to = correlation_id.as_ref().unwrap().pid.clone(); 197 | let reply = CounterMsg::Ok; 198 | let envelope = Envelope::new(to, self.pid.clone(), reply, correlation_id); 199 | output.push(envelope); 200 | } 201 | }, 202 | _ => unreachable!() 203 | } 204 | } 205 | } 206 | ``` 207 | 208 | Now let's start the replicas so that they can receive and send messages. 209 | 210 | ```Rust 211 | let primary = pids[0].clone(); 212 | let backups = vec![pids[1].clone, pids[2].clone()]; 213 | for pid in pids { 214 | // Processes can be any type that implements Process, so create a trait object with Box::new() 215 | let replica = Box::new(Counter::new(pids[i].clone(), primary.clone(), backups.clone())); 216 | // Start the replica on the correct node 217 | nodes[i].spawn(&pids[i], replica).unwrap(); 218 | } 219 | ``` 220 | 221 | # Join the nodes 222 | We need to join the nodes together into a cluster. Note that this is an operation that should most 223 | likely be exposed to the end user via an Admin server. For now though, we are just going to use the 224 | Rabble [Node 225 | API](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/node.rs#L52-L65) 226 | to do the join. 227 | 228 | In order to know when the nodes have been joined, we need to have some way of checking the cluster 229 | state and getting responses back to our requests. Normally this would be done in an admin service, 230 | but for now we can just register a channel for our test and poll on it. 231 | 232 | ```Rust 233 | nodes[0].join(&nodes[1].id).unwrap(); 234 | nodes[0].join(&nodes[2].id).unwrap(); 235 | 236 | // Create a Pid for our "test service". This is used to register a channel so that we can receive 237 | // responses to requests. 238 | let test_pid = Pid { 239 | name: "test-runner".to_string(), 240 | group: None, 241 | node: node_ids[0].clone() 242 | }; 243 | 244 | // We create an amy channel so that we can pretend this test is a service. 245 | // We register the sender and our pid with node1 so that we can check the responses to admin calls 246 | // like node.cluster_status(). 247 | let mut poller = Poller::new().unwrap(); 248 | let (test_tx, test_rx) = poller.get_registrar().channel().unwrap(); 249 | nodes[0].register_service(&test_pid, &test_tx).unwrap(); 250 | 251 | let start = SteadyTime::now(); 252 | loop { 253 | // Create a CorrelationId so that the responses to our requests get sent back on the right channel 254 | let correlation_id = CorrelationId::pid(test_pid.clone()); 255 | 256 | // Send a ClusterStatus request to the cluster server on node1. 257 | nodes[0].cluster_status(correlation_id).unwrap(); 258 | 259 | // Poll on the test channel for a response. We should only get a ClusterStatus response 260 | let _ = poller.wait(5000).unwrap(); 261 | let envelope = test_rx.try_recv().unwrap(); 262 | 263 | // Match on the msg and see if both backups are currently connected to node1 264 | if let Msg::ClusterStatus(ClusterStatus{connected, ..}) = envelope.msg { 265 | if connected.len() == 2 { 266 | println!("{:#?}", connected); 267 | println!("Cluster connected in {} ms", (SteadyTime::now() - start).num_milliseconds()); 268 | break; 269 | } 270 | } 271 | } 272 | ``` 273 | 274 | # Creating an API Service 275 | Now we have 3 nodes up, with a counter process on each one. We hacked our way through the cluster 276 | setup, but now we want to learn how to build a service so that we can present both admin and API 277 | servers to network clients. Since we've already joined the nodes, we'll focus on building an API 278 | server here. All services must implement the [ServiceHandler 279 | trait](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/service_handler.rs). 280 | 281 | Our API service will use 4 byte framed MsgPack encoded messages over TCP and will use the 282 | already built 283 | [TcpServerHandler](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/tcp_server_handler.rs). 284 | This service isolates connections from each other and routes messages to the correct connection. 285 | Connection handlers themselves are user specified and can be customized for the specific 286 | application. Therefore instead of writing a service handler directly we will instead need to 287 | implement a 288 | [ConnectionHandler](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/connection_handler.rs). 289 | 290 | Each connection handler has 2 message types that must be defined. One is for the actors in the 291 | system, which is the `CounterMsg` we've been using in the rest of the example. The other is the 292 | message sent between the client and the API server. In almost every case these messages will differ, 293 | but for our purposes they can be the same message. 294 | 295 | There are 3 callback functions to implement for a ConnectionHandler. `new()` is called with 296 | the pid of the service running the service handler (which calls the connection handler), and the 297 | unique id of the connection for use in correlation ids. ``handle_envelope()``is called when an actor msg 298 | message is sent to the connection handler. In general this occurs when a reply to a client request 299 | comes back to the handler. This reply is then bundled into the `ConnectionMsg::Client` variant and 300 | returned so it can be sent back on the client connection. ``handle_network_msg()`` gets called when a 301 | new message is received from the client. These requests are packed into Envelopes and returned as 302 | `ConnectionMsg::Envelope` variants so they can be routed to actors. 303 | 304 | ```Rust 305 | pub struct ApiServerConnectionHandler { 306 | pid: Pid, 307 | counter_pid: Pid, 308 | id: usize, 309 | total_requests: usize 310 | } 311 | 312 | impl ConnectionHandler for ApiServerConnectionHandler { 313 | type Msg = CounterMsg; 314 | type ClientMsg = CounterMsg; 315 | 316 | fn new(pid: Pid, id: usize) -> ApiServerConnectionHandler { 317 | let counter_pid = Pid { 318 | name: "replica1".to_string(), 319 | group: None, 320 | node: pid.node_id.clone() 321 | }; 322 | 323 | ApiServerConnectionHandler { 324 | pid: pid, 325 | counter_pid: counter_pid, 326 | id: id, 327 | total_requests: 0 328 | } 329 | } 330 | 331 | fn handle_envelope(&mut self, 332 | envelope: Envelope 333 | output: &mut Vec>) 334 | { 335 | let Envelope {msg, correlation_id, ..} = envelope; 336 | // Envelopes destined for a connection handler must have a correlation id 337 | let correlation_id = correlation_id.unwrap(); 338 | 339 | match msg { 340 | Msg::User(counter_msg) => 341 | output.push(ConnectionMsg::ClientMsg(counter_msg, correlation_id)); 342 | 343 | // Requests can timeout as well. Our client message should contain a Timeout variant. 344 | Msg::Timeout => ..., 345 | 346 | _ => ... /// ignore other messaages for now 347 | } 348 | } 349 | 350 | fn handle_network_msg(&mut self, 351 | msg: CounterMsg, 352 | output: &mut Vec>) 353 | { 354 | // Our client and actor messages are the same, so just forward to the counter process. 355 | // Note that in a real system, either the counter Pid would be passed in from the client, known 356 | // a-priori, or learned via an envelope in `handle_envelope`. For now we just know it 357 | // a-priori. 358 | let msg = Msg::User(msg); 359 | let correlation_id = CorrelationId::request(self.pid.clone(), self.id, self.total_requests); 360 | self.total_requests += 1; 361 | let envelope = Envelope::new(self.counter_pid.clone(), self.pid.clone(), msg, Some(correlation_id)); 362 | output.push(ConnectionMsg::Envelope(envelope)); 363 | } 364 | ``` 365 | 366 | Now that we've created the connection handler for our API server, we need to give the service a Pid and start the server. 367 | 368 | ```Rust 369 | let server_pid = Pid { 370 | name: "api-server".to_string(), 371 | group: None, 372 | node: nodes[0].id.clone() 373 | }; 374 | 375 | /// Create a TcpServerHandler that listens on "127.0.0.1:11001", has a 5 second request timeout 376 | /// and no connection timeout. 377 | let handler: TcpServerHandler> = 378 | TcpServerHandler::new(server_pid.clone(), "127.0.0.1:11001", 5000, None); 379 | let mut service = Service::new(server_pid, nodes[0].clone(), handler).unwrap(); 380 | 381 | // Services need to run in their own thread 382 | let h = thread::spawn(move || { 383 | service.wait(); 384 | }); 385 | ``` 386 | 387 | # Timers 388 | 389 | The guide so far has explained how to implement a system using rabble. It hit all of the major 390 | points. However, in assuming a bounded, reliable network, the example ignored worrying about lost or 391 | delayed messages. In reality, distributed systems must take account of this by setting a timer for 392 | each request. If the timer expires, then the user is alerted of the timeout. Whether the request 393 | succeeded or failed is indeterminate. This is an unfortunate fact of nature. Rabble allow users to 394 | add timers for all requests from within a process or service. (Note that the TcpServerHandler 395 | automatically manages request timeouts, so it is unneccessary to use this facility for that 396 | purpose.) 397 | 398 | Timers are tied to a given process and correlation id, and are declared in milliseconds. 399 | Currently the maximum timer length is 59 minutes, and the minimum timer resolution is 10ms. Timers 400 | under one second are rounded to the higher 10ms, timers of 1 second to 59 seconds are rounded to 401 | the higher second, and timers of 1 minute or more are rounded to the higher minute. This behavior 402 | is based on the hierarchical timer wheel implementation in 403 | [ferris](https://github.com/andrewjstone/ferris). 404 | 405 | Additionally, processes may want to return messages or set timers on startup. For this reason, there 406 | is an optional 407 | [init()](https://github.com/andrewjstone/rabble/blob/e1474eda584f3c278322ce21d33d56e6e30f639f/src/process.rs#L12-L14) 408 | callback that can be implemented for processes. The example below will show the impelmentation of a 409 | simple test process that starts a 100ms timer in `init()` by responding with a message destined for 410 | the executor, and then gets a callback `Msg::Timeout` in `handle`. 411 | 412 | ```Rust 413 | struct TestProcess { 414 | pid: Pid, 415 | executor_pid: Option, 416 | output: Vec> 417 | } 418 | 419 | impl Process for TestProcess { 420 | type Msg = (); 421 | 422 | fn init(&mut self, executor_pid: Pid) -> Vec> { 423 | self.executor_pid = Some(executor_pid); 424 | 425 | // Start a timer with a 100ms timeout and no correlation id. We don't need one since there is 426 | // only one timer in this example. In practice timers should almost always have CorrelationIds. 427 | vec![Envelope::new(self.executor_pid.as_ref().unwrap().clone(), 428 | self.pid.clone(), 429 | Msg::StartTimer(100), 430 | None)] 431 | } 432 | 433 | fn handle(&mut self, 434 | msg: Msg<()>, 435 | from: Pid, 436 | correlation_id: Option, 437 | output: &mut Vec>) 438 | { 439 | assert_eq!(from, *self.executor_pid.as_ref().unwrap()); 440 | assert_eq!(msg, Msg::Timeout); 441 | assert_eq!(correlation_id, None); 442 | } 443 | } 444 | ``` 445 | 446 | 447 | -------------------------------------------------------------------------------- /src/cluster/metrics.rs: -------------------------------------------------------------------------------- 1 | use metrics::{Metric, Metrics}; 2 | 3 | metrics!(ClusterMetrics { 4 | errors: u64, 5 | poll_notifications: u64, 6 | joins: u64, 7 | leaves: u64, 8 | received_local_envelopes: u64, 9 | received_remote_envelopes: u64, 10 | status_requests: u64, 11 | accepted_connections: u64, 12 | connection_attempts: u64 13 | }); 14 | -------------------------------------------------------------------------------- /src/cluster/mod.rs: -------------------------------------------------------------------------------- 1 | mod server; 2 | mod status; 3 | mod msg; 4 | mod metrics; 5 | 6 | pub use self::server::ClusterServer; 7 | pub use self::status::ClusterStatus; 8 | pub use self::msg::{ 9 | ClusterMsg, 10 | ExternalMsg 11 | }; 12 | pub use self::metrics::ClusterMetrics; 13 | -------------------------------------------------------------------------------- /src/cluster/msg.rs: -------------------------------------------------------------------------------- 1 | use amy::Notification; 2 | use orset::{ORSet, Delta}; 3 | use node_id::NodeId; 4 | use envelope::Envelope; 5 | use correlation_id::CorrelationId; 6 | 7 | /// Messages sent to the Cluster Server 8 | pub enum ClusterMsg { 9 | PollNotifications(Vec), 10 | Join(NodeId), 11 | Leave(NodeId), 12 | Envelope(Envelope), 13 | GetStatus(CorrelationId), 14 | Shutdown 15 | } 16 | 17 | /// A message sent between nodes in Rabble. 18 | /// 19 | #[derive(Debug, Clone, Serialize, Deserialize)] 20 | pub enum ExternalMsg { 21 | Members {from: NodeId, orset: ORSet}, 22 | Ping, 23 | Envelope(Envelope), 24 | Delta(Delta) 25 | } 26 | -------------------------------------------------------------------------------- /src/cluster/server.rs: -------------------------------------------------------------------------------- 1 | use std::sync::mpsc::{self, Sender, Receiver}; 2 | use std::collections::{HashMap, HashSet}; 3 | use std::net::{TcpListener, TcpStream}; 4 | use std::fmt::Debug; 5 | use libc::EINPROGRESS; 6 | use net2::{TcpBuilder, TcpStreamExt}; 7 | use serde::{Serialize, Deserialize}; 8 | use msgpack::{Serializer, Deserializer}; 9 | use slog; 10 | use amy::{Registrar, Notification, Event, FrameReader, FrameWriter}; 11 | use members::Members; 12 | use node_id::NodeId; 13 | use msg::Msg; 14 | use executor::ExecutorMsg; 15 | use timer_wheel::TimerWheel; 16 | use envelope::Envelope; 17 | use orset::{ORSet, Delta}; 18 | use pid::Pid; 19 | use correlation_id::CorrelationId; 20 | use errors::*; 21 | use metrics::Metrics; 22 | use super::{ClusterStatus, ClusterMsg, ExternalMsg, ClusterMetrics}; 23 | 24 | // TODO: This is totally arbitrary right now and should probably be user configurable 25 | const MAX_FRAME_SIZE: u32 = 100*1024*1024; // 100 MB 26 | const TICK_TIME: usize = 1000; // milliseconds 27 | const REQUEST_TIMEOUT: usize = 5000; // milliseconds 28 | 29 | // This tick allows process specific timers to fire 30 | const EXECUTOR_TICK_TIME: usize = 100; // milliseconds 31 | 32 | struct Conn { 33 | sock: TcpStream, 34 | node: Option, 35 | is_client: bool, 36 | members_sent: bool, 37 | timer_wheel_index: usize, 38 | reader: FrameReader, 39 | writer: FrameWriter 40 | } 41 | 42 | impl Conn { 43 | pub fn new(sock: TcpStream, node: Option, is_client: bool) -> Conn { 44 | Conn { 45 | sock: sock, 46 | node: node, 47 | is_client: is_client, 48 | members_sent: false, 49 | timer_wheel_index: 0, // Initialize with a fake value 50 | reader: FrameReader::new(MAX_FRAME_SIZE), 51 | writer: FrameWriter::new(), 52 | } 53 | } 54 | } 55 | 56 | /// A struct that handles cluster membership connection and routing of messages to processes on 57 | /// other nodes. 58 | pub struct ClusterServer { 59 | pid: Pid, 60 | node: NodeId, 61 | rx: Receiver>, 62 | executor_tx: Sender>, 63 | executor_timer_id: usize, 64 | timer_id: usize, 65 | timer_wheel: TimerWheel, 66 | listener: TcpListener, 67 | listener_id: usize, 68 | members: Members, 69 | connections: HashMap, 70 | established: HashMap, 71 | registrar: Registrar, 72 | logger: slog::Logger, 73 | metrics: ClusterMetrics 74 | } 75 | 76 | impl<'de, T: Serialize + Deserialize<'de> + Debug + Clone> ClusterServer { 77 | pub fn new(node: NodeId, 78 | rx: Receiver>, 79 | executor_tx: Sender>, 80 | registrar: Registrar, 81 | logger: slog::Logger) -> ClusterServer { 82 | let pid = Pid { 83 | group: Some("rabble".to_string()), 84 | name: "cluster_server".to_string(), 85 | node: node.clone() 86 | }; 87 | let listener = TcpListener::bind(&node.addr[..]).unwrap(); 88 | listener.set_nonblocking(true).unwrap(); 89 | ClusterServer { 90 | pid: pid, 91 | node: node.clone(), 92 | rx: rx, 93 | executor_tx: executor_tx, 94 | executor_timer_id: 0, 95 | timer_id: 0, 96 | timer_wheel: TimerWheel::new(REQUEST_TIMEOUT / TICK_TIME), 97 | listener: listener, 98 | listener_id: 0, 99 | members: Members::new(node), 100 | connections: HashMap::new(), 101 | established: HashMap::new(), 102 | registrar: registrar, 103 | logger: logger.new(o!("component" => "cluster_server")), 104 | metrics: ClusterMetrics::new() 105 | } 106 | } 107 | 108 | pub fn run(mut self) { 109 | info!(self.logger, "Starting"); 110 | self.timer_id = self.registrar.set_interval(TICK_TIME).unwrap(); 111 | self.executor_timer_id = self.registrar.set_interval(EXECUTOR_TICK_TIME).unwrap(); 112 | self.listener_id = self.registrar.register(&self.listener, Event::Read).unwrap(); 113 | while let Ok(msg) = self.rx.recv() { 114 | if let Err(e) = self.handle_cluster_msg(msg) { 115 | self.metrics.errors += 1; 116 | for id in e.kind().get_ids() { 117 | self.close(id) 118 | } 119 | match *e.kind() { 120 | ErrorKind::EncodeError(..) | ErrorKind::DecodeError(..) | 121 | ErrorKind::RegistrarError(..) | ErrorKind::SendError(..) => { 122 | error!(self.logger, e.to_string()); 123 | break; 124 | } 125 | 126 | ErrorKind::Shutdown(..) => { 127 | info!(self.logger, e.to_string()); 128 | break; 129 | }, 130 | 131 | _ => warn!(self.logger, e.to_string()) 132 | } 133 | } 134 | } 135 | } 136 | 137 | fn handle_cluster_msg(&mut self, msg: ClusterMsg) -> Result<()> { 138 | match msg { 139 | ClusterMsg::PollNotifications(notifications) => { 140 | self.metrics.poll_notifications += 1; 141 | self.handle_poll_notifications(notifications) 142 | }, 143 | ClusterMsg::Join(node) => { 144 | self.metrics.joins += 1; 145 | self.join(node) 146 | }, 147 | ClusterMsg::Leave(node) => { 148 | self.metrics.leaves += 1; 149 | self.leave(node) 150 | }, 151 | ClusterMsg::Envelope(envelope) => { 152 | self.metrics.received_local_envelopes += 1; 153 | // Only metric requests are directly sent to the cluster server 154 | if envelope.to == self.pid { 155 | self.send_metrics(envelope); 156 | return Ok(()); 157 | } 158 | self.send_remote(envelope) 159 | }, 160 | ClusterMsg::GetStatus(correlation_id) => { 161 | self.metrics.status_requests += 1; 162 | self.get_status(correlation_id) 163 | }, 164 | ClusterMsg::Shutdown => Err(ErrorKind::Shutdown(self.pid.clone()).into()) 165 | } 166 | } 167 | 168 | fn get_status(&self, correlation_id: CorrelationId) -> Result<()> { 169 | let status = ClusterStatus { 170 | members: self.members.all(), 171 | established: self.established.keys().cloned().collect(), 172 | num_connections: self.connections.len() 173 | }; 174 | let envelope = Envelope { 175 | to: correlation_id.pid.clone(), 176 | from: self.pid.clone(), 177 | msg: Msg::ClusterStatus(status), 178 | correlation_id: Some(correlation_id) 179 | }; 180 | // Route the response through the executor since it knows how to contact all Pids 181 | if let Err(mpsc::SendError(ExecutorMsg::Envelope(envelope))) = 182 | self.executor_tx.send(ExecutorMsg::Envelope(envelope)) 183 | { 184 | return Err(ErrorKind::SendError("ExecutorMsg::Envelope".to_string(), 185 | Some(envelope.to)).into()); 186 | } 187 | Ok(()) 188 | } 189 | 190 | fn send_remote(&mut self, envelope: Envelope) -> Result<()> { 191 | if let Some(id) = self.established.get(&envelope.to.node).cloned() { 192 | trace!(self.logger, "send remote"; "to" => envelope.to.to_string()); 193 | let mut encoded = Vec::new(); 194 | let node = envelope.to.node.clone(); 195 | try!(ExternalMsg::Envelope(envelope).serialize(&mut Serializer::new(&mut encoded)) 196 | .chain_err(|| ErrorKind::EncodeError(Some(id), Some(node)))); 197 | try!(self.write(id, Some(encoded))); 198 | } 199 | Ok(()) 200 | } 201 | 202 | fn handle_poll_notifications(&mut self, notifications: Vec) -> Result<()> { 203 | trace!(self.logger, "handle_poll_notification"; "num_notifications" => notifications.len()); 204 | let mut errors = Vec::new(); 205 | for n in notifications { 206 | let result = match n.id { 207 | id if id == self.listener_id => self.accept_connection(), 208 | id if id == self.timer_id => self.tick(), 209 | id if id == self.executor_timer_id => self.tick_executor(), 210 | _ => self.do_socket_io(n) 211 | }; 212 | 213 | if let Err(e) = result { 214 | errors.push(e); 215 | } 216 | } 217 | if errors.len() != 0 { 218 | return Err(ErrorKind::PollNotificationErrors(errors).into()); 219 | } 220 | Ok(()) 221 | } 222 | 223 | fn do_socket_io(&mut self, notification: Notification) -> Result<()> { 224 | match notification.event { 225 | Event::Read => self.read(notification.id), 226 | Event::Write => self.write(notification.id, None), 227 | Event::Both => { 228 | try!(self.read(notification.id)); 229 | self.write(notification.id, None) 230 | } 231 | } 232 | } 233 | 234 | /// Returns `Some(true)` if there is such a connection and the members were already sent. 235 | /// Returns `Some(false)` if there is such a connection and the members were NOT sent. 236 | /// Returns None if there is no such connection. 237 | fn members_sent(&self, id: usize) -> Option { 238 | if let Some(conn) = self.connections.get(&id) { 239 | return Some(conn.members_sent); 240 | } 241 | None 242 | } 243 | 244 | fn read(&mut self, id: usize) -> Result<()> { 245 | trace!(self.logger, "read"; "id" => id); 246 | match self.members_sent(id) { 247 | Some(false) => try!(self.send_members(id)), 248 | None => (), 249 | Some(true) => { 250 | let messages = try!(self.decode_messages(id)); 251 | for msg in messages { 252 | try!(self.handle_decoded_message(id, msg)); 253 | } 254 | } 255 | } 256 | Ok(()) 257 | } 258 | 259 | fn handle_decoded_message(&mut self, id: usize, msg: ExternalMsg) -> Result<()> { 260 | match msg { 261 | ExternalMsg::Members{from, orset} => { 262 | info!(self.logger, "Got Members"; "id" => id, "from" => from.to_string()); 263 | self.establish_connection(id, from, orset); 264 | self.check_connections(); 265 | }, 266 | ExternalMsg::Ping => { 267 | trace!(self.logger, "Got Ping"; "id" => id); 268 | self.reset_timer(id); 269 | } 270 | ExternalMsg::Envelope(envelope) => { 271 | self.metrics.received_remote_envelopes += 1; 272 | debug!(self.logger, "Got User Message"; 273 | "from" => envelope.from.to_string(), 274 | "to" => envelope.to.to_string()); 275 | if let Err(mpsc::SendError(ExecutorMsg::Envelope(envelope))) 276 | = self.executor_tx.send(ExecutorMsg::Envelope(envelope)) 277 | { 278 | return Err(ErrorKind::SendError("ExecutorMsg::Enelope".to_string(), 279 | Some(envelope.to)).into()); 280 | } 281 | }, 282 | ExternalMsg::Delta(delta) => { 283 | debug!(self.logger, "Got Delta mutator"; 284 | "id" => id, "delta" => format!("{:?}", delta)); 285 | if self.members.join_delta(delta.clone()) { 286 | try!(self.broadcast_delta(delta)); 287 | } 288 | } 289 | } 290 | Ok(()) 291 | } 292 | 293 | fn write(&mut self, id: usize, msg: Option>) -> Result<()> { 294 | trace!(self.logger, "write"; "id" => id); 295 | let registrar = &self.registrar; 296 | if let Some(mut conn) = self.connections.get_mut(&id) { 297 | if msg.is_none() { 298 | if conn.writer.is_writable() { 299 | // The socket has just became writable. We need to re-register it as only 300 | // readable, or it the event will keep firing indefinitely even if there is 301 | // no data to write. 302 | try!(registrar.reregister(id, &conn.sock, Event::Read) 303 | .chain_err(|| ErrorKind::RegistrarError(Some(id), conn.node.clone()))); 304 | } 305 | 306 | // We just got an Event::Write from the poller 307 | conn.writer.writable(); 308 | } 309 | try!(conn_write(id, &mut conn, msg, ®istrar)); 310 | } 311 | Ok(()) 312 | } 313 | 314 | fn reset_timer(&mut self, id: usize) { 315 | if let Some(conn) = self.connections.get_mut(&id) { 316 | self.timer_wheel.remove(&id, conn.timer_wheel_index); 317 | conn.timer_wheel_index = self.timer_wheel.insert(id) 318 | } 319 | } 320 | 321 | /// Transition a connection from unestablished to established. If there is already an 322 | /// established connection between these two nodes, determine which one should be closed. 323 | fn establish_connection(&mut self, id: usize, from: NodeId, orset: ORSet) { 324 | self.members.join(orset); 325 | if let Some(close_id) = self.choose_connection_to_close(id, &from) { 326 | debug!(self.logger, 327 | "Two connections between nodes. Closing the connection where \ 328 | the peer that sorts lower was the connecting client"; 329 | "peer" => from.to_string(), "id" => close_id); 330 | self.close(close_id); 331 | if close_id == id { 332 | return; 333 | } 334 | } 335 | debug!(self.logger, "Trying to establish connection"; "peer" => from.to_string(), "id" => id); 336 | if let Some(conn) = self.connections.get_mut(&id) { 337 | info!(self.logger, "Establish connection"; "peer" => from.to_string(), "id" => id); 338 | conn.node = Some(from.clone()); 339 | self.timer_wheel.remove(&id, conn.timer_wheel_index); 340 | conn.timer_wheel_index = self.timer_wheel.insert(id); 341 | self.established.insert(from, id); 342 | } 343 | } 344 | 345 | /// We only want a single connection between nodes. Choose the connection where the client side 346 | /// comes from a node that sorts less than the node of the server side of the connection. 347 | /// Return the id to remove if there is an existing connection to remove, otherwise return 348 | /// `None` indicating that there isn't an existing connection, so don't close the new one. 349 | fn choose_connection_to_close(&self, id: usize, from: &NodeId) -> Option { 350 | if let Some(saved_id) = self.established.get(from) { 351 | if let Some(saved_conn) = self.connections.get(&saved_id) { 352 | // A client connection always comes from self.node 353 | if (saved_conn.is_client && self.node < *from) || 354 | (!saved_conn.is_client && *from < self.node) { 355 | return Some(*saved_id); 356 | } else { 357 | return Some(id); 358 | } 359 | } 360 | } 361 | None 362 | } 363 | 364 | fn decode_messages(&mut self, id: usize) -> Result>> { 365 | let mut output = Vec::new(); 366 | if let Some(conn) = self.connections.get_mut(&id) { 367 | let node = conn.node.clone(); 368 | try!(conn.reader.read(&mut conn.sock) 369 | .chain_err(|| ErrorKind::ReadError(id, node.clone()))); 370 | 371 | for frame in conn.reader.iter_mut() { 372 | let mut decoder = Deserializer::new(&frame[..]); 373 | let msg = try!(Deserialize::deserialize(&mut decoder) 374 | .chain_err(|| ErrorKind::DecodeError(id, node.clone()))); 375 | output.push(msg); 376 | } 377 | } 378 | Ok(output) 379 | } 380 | 381 | fn join(&mut self, node: NodeId) -> Result<()> { 382 | let delta = self.members.add(node.clone()); 383 | try!(self.broadcast_delta(delta)); 384 | self.metrics.connection_attempts += 1; 385 | self.connect(node) 386 | } 387 | 388 | fn leave(&mut self, node: NodeId) -> Result<()> { 389 | if let Some(delta) = self.members.leave(node.clone()) { 390 | try!(self.broadcast_delta(delta)); 391 | } 392 | Ok(()) 393 | } 394 | 395 | fn connect(&mut self, node: NodeId) -> Result<()> { 396 | debug!(self.logger, "connect"; "to" => node.to_string()); 397 | let sock = try!(TcpBuilder::new_v4().chain_err(|| "Failed to create a IPv4 socket")); 398 | let sock = try!(sock.to_tcp_stream().chain_err(|| "Failed to create TcpStream")); 399 | try!(sock.set_nonblocking(true).chain_err(|| "Failed to make socket nonblocking")); 400 | if let Err(e) = sock.connect(&node.addr[..]) { 401 | if e.raw_os_error().is_some() && *e.raw_os_error().as_ref().unwrap() != EINPROGRESS { 402 | return Err(e).chain_err(|| ErrorKind::ConnectError(node)); 403 | } 404 | } 405 | try!(self.init_connection(sock, Some(node))); 406 | Ok(()) 407 | } 408 | 409 | fn accept_connection(&mut self) -> Result<()> { 410 | while let Ok((sock, _)) = self.listener.accept() { 411 | self.metrics.accepted_connections += 1; 412 | debug!(self.logger, "accepted connection"); 413 | try!(sock.set_nonblocking(true).chain_err(|| "Failed to make socket nonblocking")); 414 | let id = try!(self.init_connection(sock, None)); 415 | try!(self.send_members(id)); 416 | } 417 | Ok(()) 418 | } 419 | 420 | fn init_connection(&mut self, sock: TcpStream, node: Option) -> Result { 421 | let id = try!(self.registrar.register(&sock, Event::Read) 422 | .chain_err(|| ErrorKind::RegistrarError(None, None))); 423 | debug!(self.logger, "init_connection()"; 424 | "id" => id, "is_client" => node.is_some(), "peer" => format!("{:?}", node)); 425 | let is_client = node.is_some(); 426 | let mut conn = Conn::new(sock, node, is_client); 427 | conn.timer_wheel_index = self.timer_wheel.insert(id); 428 | self.connections.insert(id, conn); 429 | Ok(id) 430 | } 431 | 432 | fn send_members(&mut self, id: usize) -> Result<()> { 433 | let encoded = try!(self.encode_members(id)); 434 | let registrar = &self.registrar; 435 | if let Some(mut conn) = self.connections.get_mut(&id) { 436 | info!(self.logger, "Send members"; "id" => id); 437 | try!(conn_write(id, &mut conn, Some(encoded), ®istrar)); 438 | conn.members_sent = true; 439 | } 440 | Ok(()) 441 | } 442 | 443 | fn tick(&mut self) -> Result<()> { 444 | trace!(self.logger, "tick"); 445 | let expired = self.timer_wheel.expire(); 446 | self.deregister(expired); 447 | try!(self.broadcast_pings()); 448 | self.check_connections(); 449 | Ok(()) 450 | } 451 | 452 | fn tick_executor(&mut self) -> Result<()> { 453 | trace!(self.logger, "tick_executor"); 454 | // Panic if the executor is down. 455 | self.executor_tx.send(ExecutorMsg::Tick).unwrap() ; 456 | Ok(()) 457 | } 458 | 459 | fn encode_members(&self, id: usize) -> Result> { 460 | let orset = self.members.get_orset(); 461 | let mut encoded = Vec::new(); 462 | let msg = ExternalMsg::Members:: {from: self.node.clone(), orset: orset}; 463 | try!(msg.serialize(&mut Serializer::new(&mut encoded)) 464 | .chain_err(|| ErrorKind::EncodeError(Some(id), None))); 465 | Ok(encoded) 466 | } 467 | 468 | fn deregister(&mut self, expired: HashSet) { 469 | for id in expired.iter() { 470 | warn!(self.logger, "Connection timeout"; "id" => *id); 471 | self.close(*id); 472 | } 473 | } 474 | 475 | /// Close an existing connection and remove all related state. 476 | fn close(&mut self, id: usize) { 477 | if let Some(conn) = self.connections.remove(&id) { 478 | let _ = self.registrar.deregister(conn.sock); 479 | self.timer_wheel.remove(&id, conn.timer_wheel_index); 480 | if let Some(node) = conn.node { 481 | // Remove established connection if it matches this id 482 | if let Some(established_id) = self.established.remove(&node) { 483 | if established_id == id { 484 | info!(self.logger, "Closing established connection"; 485 | "id" => id,"peer" => node.to_string()); 486 | return; 487 | } 488 | // The established node didn't correspond to this id, so put it back 489 | self.established.insert(node, established_id); 490 | } 491 | } 492 | info!(self.logger, "Closing unestablished connection"; "id" => id); 493 | } 494 | } 495 | 496 | fn broadcast_delta(&mut self, delta: Delta) -> Result<()> { 497 | debug!(self.logger, "Broadcasting delta"; "delta" => format!("{:?}", delta)); 498 | let mut encoded = Vec::new(); 499 | let msg = ExternalMsg::Delta::(delta); 500 | try!(msg.serialize(&mut Serializer::new(&mut encoded)) 501 | .chain_err(|| ErrorKind::EncodeError(None, None))); 502 | self.broadcast(encoded) 503 | } 504 | 505 | fn broadcast_pings(&mut self) -> Result<()> { 506 | let mut encoded = Vec::new(); 507 | let msg = ExternalMsg::Ping::; 508 | try!(msg.serialize(&mut Serializer::new(&mut encoded)) 509 | .chain_err(|| ErrorKind::EncodeError(None, None))); 510 | self.broadcast(encoded) 511 | } 512 | 513 | // Write encoded values to all connections and return the id of any connections with errors 514 | fn broadcast(&mut self, encoded: Vec) -> Result<()> { 515 | let mut errors = Vec::new(); 516 | let registrar = &self.registrar; 517 | for (id, mut conn) in self.connections.iter_mut() { 518 | if !conn.members_sent { 519 | // This connection isn't connected yet 520 | continue; 521 | } 522 | if let Err(e) = conn_write(*id, &mut conn, Some(encoded.clone()), ®istrar) { 523 | errors.push(e) 524 | } 525 | } 526 | if errors.len() != 0 { 527 | return Err(ErrorKind::BroadcastError(errors).into()); 528 | } 529 | Ok(()) 530 | } 531 | 532 | // Ensure connections are correct based on membership state 533 | fn check_connections(&mut self) { 534 | let all = self.members.all(); 535 | 536 | // If this node is no longer a member of the cluster disconnect from all nodes 537 | if !all.contains(&self.node) { 538 | return self.disconnect_all(); 539 | } 540 | 541 | // Pending, Client connected, or established server side connections 542 | let known_peer_conns: HashSet = 543 | self.connections.iter().filter_map(|(_, conn)| conn.node.clone()).collect(); 544 | 545 | let to_connect: Vec = all.difference(&known_peer_conns) 546 | .filter(|&node| *node != self.node).cloned().collect(); 547 | 548 | let to_disconnect: Vec = known_peer_conns.difference(&all).cloned().collect(); 549 | 550 | trace!(self.logger, "check_connections"; 551 | "to_connect" => format!("{:?}", to_connect), 552 | "to_disconnect" => format!("{:?}", to_disconnect)); 553 | 554 | for node in to_connect { 555 | self.metrics.connection_attempts += 1; 556 | if let Err(e) = self.connect(node) { 557 | warn!(self.logger, e.to_string()); 558 | } 559 | } 560 | 561 | self.disconnect_established(to_disconnect); 562 | } 563 | 564 | fn disconnect_all(&mut self) { 565 | self.established = HashMap::new(); 566 | for (id, conn) in self.connections.drain() { 567 | self.timer_wheel.remove(&id, conn.timer_wheel_index); 568 | if let Err(e) = self.registrar.deregister(conn.sock) { 569 | error!(self.logger, "Failed to deregister socket"; 570 | "id" => id, "peer" => format!("{:?}", conn.node), 571 | "error" => e.to_string()); 572 | } 573 | } 574 | } 575 | 576 | fn disconnect_established(&mut self, to_disconnect: Vec) { 577 | for node in to_disconnect { 578 | if let Some(id) = self.established.remove(&node) { 579 | let conn = self.connections.remove(&id).unwrap(); 580 | self.timer_wheel.remove(&id, conn.timer_wheel_index); 581 | if let Err(e) = self.registrar.deregister(conn.sock) { 582 | error!(self.logger, "Failed to deregister socket"; 583 | "id" => id, "peer" => conn.node.unwrap().to_string(), 584 | "error" => e.to_string()); 585 | } 586 | } 587 | } 588 | } 589 | 590 | fn send_metrics(&mut self, envelope: Envelope) { 591 | if let Msg::GetMetrics = envelope.msg { 592 | let new_envelope = Envelope { 593 | to: envelope.from, 594 | from: self.pid.clone(), 595 | msg: Msg::Metrics(self.metrics.data()), 596 | correlation_id: envelope.correlation_id 597 | }; 598 | // Route the response through the executor since it knows how to contact all Pids 599 | if let Err(mpsc::SendError(ExecutorMsg::Envelope(new_envelope))) = 600 | self.executor_tx.send(ExecutorMsg::Envelope(new_envelope)) 601 | { 602 | error!(self.logger, "Failed to send to executor"; 603 | "envelope" => format!("{:?}", new_envelope)); 604 | } 605 | } else { 606 | error!(self.logger, "Received Unknown Msg"; 607 | "envelope" => format!("{:?}", envelope)); 608 | } 609 | } 610 | } 611 | 612 | fn conn_write(id: usize, 613 | conn: &mut Conn, 614 | msg: Option>, 615 | registrar: &Registrar) -> Result<()> 616 | { 617 | let writable = try!(conn.writer.write(&mut conn.sock, msg).chain_err(|| { 618 | ErrorKind::WriteError(id, conn.node.clone()) 619 | })); 620 | if !writable { 621 | return registrar.reregister(id, &conn.sock, Event::Both) 622 | .chain_err(|| ErrorKind::RegistrarError(Some(id), conn.node.clone())); 623 | } 624 | Ok(()) 625 | } 626 | 627 | -------------------------------------------------------------------------------- /src/cluster/status.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | use node_id::NodeId; 3 | 4 | #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] 5 | pub struct ClusterStatus { 6 | pub members: HashSet, 7 | pub established: HashSet, 8 | pub num_connections: usize 9 | } 10 | -------------------------------------------------------------------------------- /src/correlation_id.rs: -------------------------------------------------------------------------------- 1 | use pid::Pid; 2 | 3 | /// Match requests through the system with their handlers 4 | /// 5 | /// All correlation ids must have a pid. 6 | /// Sometimes individual connections/requests aren't tracked so that field is optional. 7 | #[derive(Debug, Hash, Clone, Eq, PartialEq, Serialize, Deserialize)] 8 | pub struct CorrelationId { 9 | pub pid: Pid, 10 | pub connection: Option, 11 | pub request: Option 12 | } 13 | 14 | impl CorrelationId { 15 | 16 | pub fn pid(pid: Pid) -> CorrelationId { 17 | CorrelationId { 18 | pid: pid, 19 | connection: None, 20 | request: None, 21 | } 22 | } 23 | 24 | /// Create a correlation id that matches a handler and connection 25 | pub fn connection(pid: Pid, connection_id: u64) -> CorrelationId { 26 | CorrelationId { 27 | pid: pid, 28 | connection: Some(connection_id), 29 | request: None 30 | } 31 | } 32 | 33 | /// Create a correlation id that matches a handler, connection, and request 34 | pub fn request(pid: Pid, connection_id: u64, request_id: u64) -> CorrelationId { 35 | CorrelationId { 36 | pid: pid, 37 | connection: Some(connection_id), 38 | request: Some(request_id) 39 | } 40 | } 41 | 42 | /// Clone the CorrelationId and increment the request counter 43 | pub fn next_request(&self) -> CorrelationId { 44 | let mut id = self.clone(); 45 | id.request = id.request.map(|req| req + 1); 46 | id 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/envelope.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use serde::{Serialize, Deserialize}; 3 | use pid::Pid; 4 | use correlation_id::CorrelationId; 5 | use msg::Msg; 6 | 7 | /// Envelopes are routable to processes on all nodes and threads running on the same node as this 8 | /// process. 9 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 10 | pub struct Envelope { 11 | pub to: Pid, 12 | pub from: Pid, 13 | pub msg: Msg, 14 | pub correlation_id: Option 15 | } 16 | 17 | impl<'de, T: Serialize + Deserialize<'de> + Debug + Clone> Envelope { 18 | pub fn new(to: Pid, from: Pid, msg: Msg, c_id: Option) -> Envelope { 19 | Envelope { 20 | to: to, 21 | from: from, 22 | msg: msg, 23 | correlation_id: c_id 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use msgpack; 3 | use protobuf; 4 | use pid::Pid; 5 | use node_id::NodeId; 6 | 7 | /// Used by the error-chain crate to generate errors 8 | error_chain! { 9 | foreign_links { 10 | io::Error, Io; 11 | msgpack::encode::Error, MsgpackEncode; 12 | msgpack::decode::Error, MsgpackDecode; 13 | protobuf::error::ProtobufError, Protobuf; 14 | } 15 | 16 | errors { 17 | EncodeError(id: Option, to: Option) { 18 | description("Failed to encode message") 19 | display("Failed to encode message to {:?}, id={:?}", to, id) 20 | } 21 | DecodeError(id: usize, from: Option) { 22 | description("Failed to decode message") 23 | display("Failed to decode message from {:?}, id={}", from, id) 24 | } 25 | RegistrarError(id: Option, node: Option) { 26 | description("Failed to register/deregister/reregister socket") 27 | display("Failed to register/deregister/reregister socket: id={:?}, peer={:?}", id, node) 28 | } 29 | WriteError(id: usize, node: Option) { 30 | description("Failed to write to socket") 31 | display("Failed to write to socket: id={}, peer={:?}", id, node) 32 | } 33 | ReadError(id: usize, node: Option) { 34 | description("Failed to read from socket") 35 | display("Failed to read from socket: id={}, peer={:?}", id, node) 36 | } 37 | BroadcastError(errors: Vec) { 38 | description("Failed to broadcast") 39 | display("Failed to broadcast: errors = {:?}", errors) 40 | } 41 | PollNotificationErrors(errors: Vec) { 42 | description("Failed to process poll notifications") 43 | display("Failed to process poll notifications: errors = {:?}", errors) 44 | } 45 | ConnectError(node: NodeId) { 46 | description("Failed to connect") 47 | display("Failed to connect to {}", node) 48 | } 49 | SendError(msg: String, pid: Option) { 50 | description("Failed to send") 51 | display("Failed to send {} to {:?}", msg, pid) 52 | } 53 | Shutdown(pid: Pid) { 54 | description("Shutting down") 55 | display("Shutting down {}", pid) 56 | } 57 | } 58 | } 59 | 60 | impl ErrorKind { 61 | /// Return the socket ids of the error if there are any 62 | pub fn get_ids(&self) -> Vec { 63 | match *self { 64 | ErrorKind::EncodeError(id, _) => id.map_or(vec![], |id| vec![id]), 65 | ErrorKind::DecodeError(id, _) => vec![id], 66 | ErrorKind::RegistrarError(id, _) => id.map_or(vec![], |id| vec![id]), 67 | ErrorKind::WriteError(id, _) => vec![id], 68 | ErrorKind::ReadError(id, _) => vec![id], 69 | ErrorKind::BroadcastError(ref errors) => 70 | errors.iter().flat_map(|e| e.kind().get_ids()).collect(), 71 | ErrorKind::PollNotificationErrors(ref errors) => 72 | errors.iter().flat_map(|e| e.kind().get_ids()).collect(), 73 | 74 | _ => vec![] 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/executor/executor.rs: -------------------------------------------------------------------------------- 1 | use serde::{Serialize, Deserialize}; 2 | use std::mem; 3 | use std::fmt::Debug; 4 | use std::sync::mpsc::{Sender, Receiver}; 5 | use std::collections::HashMap; 6 | use amy; 7 | use slog; 8 | use time::Duration; 9 | use ferris::{Wheel, CopyWheel, Resolution}; 10 | use envelope::Envelope; 11 | use pid::Pid; 12 | use process::Process; 13 | use node_id::NodeId; 14 | use msg::Msg; 15 | use cluster::ClusterMsg; 16 | use correlation_id::CorrelationId; 17 | use metrics::Metrics; 18 | use super::{ExecutorStatus, ExecutorMetrics, ExecutorMsg}; 19 | 20 | pub struct Executor { 21 | pid: Pid, 22 | node: NodeId, 23 | envelopes: Vec>, 24 | processes: HashMap>>, 25 | service_senders: HashMap>>, 26 | tx: Sender>, 27 | rx: Receiver>, 28 | cluster_tx: Sender>, 29 | timer_wheel: CopyWheel<(Pid, Option)>, 30 | logger: slog::Logger, 31 | metrics: ExecutorMetrics 32 | } 33 | 34 | impl<'de, T: Serialize + Deserialize<'de> + Send + Debug + Clone> Executor { 35 | pub fn new(node: NodeId, 36 | tx: Sender>, 37 | rx: Receiver>, 38 | cluster_tx: Sender>, 39 | logger: slog::Logger) -> Executor { 40 | let pid = Pid { 41 | group: Some("rabble".to_string()), 42 | name: "executor".to_string(), 43 | node: node.clone() 44 | }; 45 | Executor { 46 | pid: pid, 47 | node: node, 48 | envelopes: Vec::new(), 49 | processes: HashMap::new(), 50 | service_senders: HashMap::new(), 51 | tx: tx, 52 | rx: rx, 53 | cluster_tx: cluster_tx, 54 | timer_wheel: CopyWheel::new(vec![Resolution::TenMs, Resolution::Sec, Resolution::Min]), 55 | logger: logger.new(o!("component" => "executor")), 56 | metrics: ExecutorMetrics::new() 57 | } 58 | } 59 | 60 | /// Run the executor 61 | /// 62 | ///This call blocks the current thread indefinitely. 63 | pub fn run(mut self) { 64 | while let Ok(msg) = self.rx.recv() { 65 | match msg { 66 | ExecutorMsg::Envelope(envelope) => { 67 | self.metrics.received_envelopes += 1; 68 | self.route(envelope); 69 | }, 70 | ExecutorMsg::Start(pid, process) => self.start(pid, process), 71 | ExecutorMsg::Stop(pid) => self.stop(pid), 72 | ExecutorMsg::RegisterService(pid, tx) => { 73 | self.service_senders.insert(pid, tx); 74 | }, 75 | ExecutorMsg::GetStatus(correlation_id) => self.get_status(correlation_id), 76 | ExecutorMsg::Tick => self.tick(), 77 | 78 | // Just return so the thread exits 79 | ExecutorMsg::Shutdown => return 80 | } 81 | } 82 | } 83 | 84 | fn get_status(&self, correlation_id: CorrelationId) { 85 | let status = ExecutorStatus { 86 | total_processes: self.processes.len(), 87 | services: self.service_senders.keys().cloned().collect() 88 | }; 89 | let envelope = Envelope { 90 | to: correlation_id.pid.clone(), 91 | from: self.pid.clone(), 92 | msg: Msg::ExecutorStatus(status), 93 | correlation_id: Some(correlation_id) 94 | }; 95 | self.route_to_service(envelope); 96 | } 97 | 98 | fn start(&mut self, pid: Pid, mut process: Box>) { 99 | let envelopes = process.init(self.pid.clone()); 100 | self.processes.insert(pid, process); 101 | for envelope in envelopes { 102 | if envelope.to == self.pid { 103 | self.handle_executor_envelope(envelope); 104 | } else { 105 | self.route(envelope); 106 | } 107 | } 108 | } 109 | 110 | fn stop(&mut self, pid: Pid) { 111 | self.processes.remove(&pid); 112 | } 113 | 114 | fn tick(&mut self) { 115 | for (pid, c_id) in self.timer_wheel.expire() { 116 | let envelope = Envelope::new(pid, self.pid.clone(), Msg::Timeout, c_id); 117 | let _ = self.route_to_process(envelope); 118 | } 119 | } 120 | 121 | /// Route envelopes to local or remote processes 122 | /// 123 | /// Retrieve any envelopes from processes handling local messages and put them on either the 124 | /// executor or the cluster channel depending upon whether they are local or remote. 125 | /// 126 | /// Note that all envelopes sent to an executor are sent from the local cluster server and must 127 | /// be addressed to local processes. 128 | fn route(&mut self, envelope: Envelope) { 129 | if self.node != envelope.to.node { 130 | self.cluster_tx.send(ClusterMsg::Envelope(envelope)).unwrap(); 131 | return; 132 | } 133 | if let Err(envelope) = self.route_to_process(envelope) { 134 | self.route_to_service(envelope); 135 | } 136 | } 137 | 138 | /// Route an envelope to a process if it exists on this node. 139 | /// 140 | /// Return Ok(()) if the process exists, Err(envelope) otherwise. 141 | fn route_to_process(&mut self, envelope: Envelope) -> Result<(), Envelope> { 142 | if envelope.to == self.pid { 143 | self.handle_executor_envelope(envelope); 144 | return Ok(()); 145 | } 146 | 147 | if &envelope.to.name == "cluster_server" && 148 | envelope.to.group.as_ref().unwrap() == "rabble" 149 | { 150 | self.cluster_tx.send(ClusterMsg::Envelope(envelope)).unwrap(); 151 | return Ok(()); 152 | } 153 | 154 | if let Some(process) = self.processes.get_mut(&envelope.to) { 155 | let Envelope {from, msg, correlation_id, ..} = envelope; 156 | process.handle(msg, from, correlation_id, &mut self.envelopes); 157 | } else { 158 | return Err(envelope); 159 | }; 160 | 161 | // Take envelopes out of self temporarily so we don't get a borrowck error 162 | let mut envelopes = mem::replace(&mut self.envelopes, Vec::new()); 163 | for envelope in envelopes.drain(..) { 164 | if envelope.to == self.pid { 165 | self.handle_executor_envelope(envelope); 166 | continue; 167 | } 168 | if envelope.to.node == self.node { 169 | // This won't ever fail because we hold a ref to both ends of the channel 170 | self.tx.send(ExecutorMsg::Envelope(envelope)).unwrap(); 171 | } else { 172 | self.cluster_tx.send(ClusterMsg::Envelope(envelope)).unwrap(); 173 | } 174 | } 175 | // Return the allocated vec back to self 176 | let _ = mem::replace(&mut self.envelopes, envelopes); 177 | Ok(()) 178 | } 179 | 180 | /// Route an envelope to a service on this node 181 | fn route_to_service(&self, envelope: Envelope) { 182 | if let Some(tx) = self.service_senders.get(&envelope.to) { 183 | tx.send(envelope).unwrap(); 184 | } else { 185 | warn!(self.logger, "Failed to find service"; "pid" => envelope.to.to_string()); 186 | } 187 | } 188 | 189 | fn handle_executor_envelope(&mut self, envelope: Envelope) { 190 | let Envelope {from, msg, correlation_id, ..} = envelope; 191 | match msg { 192 | Msg::StartTimer(time_in_ms) => { 193 | self.timer_wheel.start((from, correlation_id), 194 | Duration::milliseconds(time_in_ms as i64)); 195 | self.metrics.timers_started += 1; 196 | }, 197 | Msg::CancelTimer(correlation_id) => { 198 | self.timer_wheel.stop((from, correlation_id)); 199 | self.metrics.timers_cancelled += 1; 200 | } 201 | Msg::GetMetrics => self.send_metrics(from, correlation_id), 202 | _ => error!(self.logger, "Invalid message sent to executor"; 203 | "from" => from.to_string(), "msg" => format!("{:?}", msg)) 204 | } 205 | } 206 | 207 | fn send_metrics(&mut self, from: Pid, correlation_id: Option) { 208 | self.metrics.processes = self.processes.len() as i64; 209 | self.metrics.services = self.service_senders.len() as i64; 210 | let envelope = Envelope { 211 | to: from, 212 | from: self.pid.clone(), 213 | msg: Msg::Metrics(self.metrics.data()), 214 | correlation_id: correlation_id 215 | }; 216 | self.route(envelope); 217 | } 218 | } 219 | 220 | -------------------------------------------------------------------------------- /src/executor/metrics.rs: -------------------------------------------------------------------------------- 1 | use metrics::{Metric, Metrics}; 2 | 3 | metrics!(ExecutorMetrics { 4 | processes: i64, 5 | services: i64, 6 | received_envelopes: u64, 7 | timers_started: u64, 8 | timers_cancelled: u64 9 | }); 10 | -------------------------------------------------------------------------------- /src/executor/mod.rs: -------------------------------------------------------------------------------- 1 | mod executor; 2 | mod status; 3 | mod msg; 4 | mod metrics; 5 | 6 | pub use self::executor::Executor; 7 | pub use self::status::ExecutorStatus; 8 | pub use self::msg::ExecutorMsg; 9 | pub use self::metrics::ExecutorMetrics; 10 | -------------------------------------------------------------------------------- /src/executor/msg.rs: -------------------------------------------------------------------------------- 1 | use envelope::Envelope; 2 | use process::Process; 3 | use pid::Pid; 4 | use correlation_id::CorrelationId; 5 | use amy; 6 | 7 | pub enum ExecutorMsg { 8 | Start(Pid, Box>), 9 | Stop(Pid), 10 | Envelope(Envelope), 11 | RegisterService(Pid, amy::Sender>), 12 | GetStatus(CorrelationId), 13 | Shutdown, 14 | Tick 15 | } 16 | -------------------------------------------------------------------------------- /src/executor/status.rs: -------------------------------------------------------------------------------- 1 | use pid::Pid; 2 | 3 | #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] 4 | pub struct ExecutorStatus { 5 | pub total_processes: usize, 6 | pub services: Vec, 7 | //... Some stats 8 | } 9 | -------------------------------------------------------------------------------- /src/histogram.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{self, Debug, Formatter}; 2 | use hdrsample; 3 | 4 | #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] 5 | pub enum TimeUnit { 6 | Seconds, 7 | Milliseconds, 8 | Microseconds, 9 | Nanoseconds 10 | } 11 | 12 | #[derive(Clone, PartialEq, Serialize, Deserialize)] 13 | pub struct Histogram { 14 | pub unit: TimeUnit, 15 | pub histogram: hdrsample::Histogram 16 | } 17 | 18 | impl Debug for Histogram { 19 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 20 | write!(f, "Histogram ({:?})", self.unit) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![recursion_limit = "1024"] 2 | 3 | #[macro_use] 4 | extern crate error_chain; 5 | 6 | extern crate orset; 7 | extern crate rmp_serde as msgpack; 8 | extern crate protobuf; 9 | extern crate amy; 10 | extern crate time; 11 | extern crate net2; 12 | extern crate libc; 13 | extern crate ferris; 14 | //extern crate hdrsample; 15 | 16 | #[macro_use] 17 | extern crate slog; 18 | extern crate slog_stdlog; 19 | 20 | extern crate serde; 21 | 22 | #[macro_use] 23 | extern crate serde_derive; 24 | 25 | #[macro_use] 26 | mod metrics; 27 | 28 | mod node_id; 29 | mod node; 30 | mod members; 31 | mod pid; 32 | mod process; 33 | mod envelope; 34 | mod executor; 35 | mod cluster; 36 | mod msg; 37 | mod timer_wheel; 38 | mod service; 39 | mod correlation_id; 40 | pub mod serialize; 41 | 42 | pub mod errors; 43 | 44 | pub use errors::Result; 45 | pub use node_id::NodeId; 46 | pub use node::Node; 47 | pub use pid::Pid; 48 | pub use process::Process; 49 | pub use envelope::Envelope; 50 | pub use correlation_id::CorrelationId; 51 | pub use msg::Msg; 52 | pub use metrics::Metric; 53 | 54 | pub use cluster::{ 55 | ClusterServer, 56 | ClusterStatus, 57 | }; 58 | 59 | pub use executor::{ 60 | Executor, 61 | ExecutorStatus, 62 | ExecutorMetrics 63 | }; 64 | 65 | pub use service::{ 66 | Service, 67 | ConnectionHandler, 68 | ConnectionMsg, 69 | ServiceHandler, 70 | TcpServerHandler, 71 | }; 72 | 73 | use std::thread::{self, JoinHandle}; 74 | use std::sync::mpsc::channel; 75 | use std::fmt::Debug; 76 | use serde::{Deserialize, Serialize}; 77 | use amy::Poller; 78 | use slog::DrainExt; 79 | use cluster::ClusterMsg; 80 | 81 | const TIMEOUT: usize = 5000; // ms 82 | 83 | /// Start a node in the rabble cluster and return it along with the handles to all threads started 84 | /// by rabble. 85 | /// 86 | /// All nodes in a cluster must be parameterized by the same type. 87 | pub fn rouse<'de, T>(node_id: NodeId, logger: Option) -> (Node, Vec>) 88 | where T: Serialize + Deserialize<'de> + Send + 'static + Clone + Debug, 89 | { 90 | let logger = match logger { 91 | Some(logger) => logger.new(o!("node_id" => node_id.to_string())), 92 | None => slog::Logger::root(slog_stdlog::StdLog.fuse(), o!("node_id" => node_id.to_string())) 93 | }; 94 | 95 | let mut poller = Poller::new().unwrap(); 96 | let (exec_tx, exec_rx) = channel(); 97 | let (cluster_tx, cluster_rx) = channel(); 98 | let cluster_server = ClusterServer::new(node_id.clone(), 99 | cluster_rx, 100 | exec_tx.clone(), 101 | poller.get_registrar().unwrap(), 102 | logger.clone()); 103 | let executor = Executor::new(node_id.clone(), 104 | exec_tx.clone(), 105 | exec_rx, 106 | cluster_tx.clone(), 107 | logger.clone()); 108 | 109 | let h1 = thread::Builder::new().name(format!("cluster_server::{}", node_id)).spawn(move || { 110 | cluster_server.run() 111 | }).unwrap(); 112 | 113 | let h2 = thread::Builder::new().name(format!("executor::{}", node_id)).spawn(move || { 114 | executor.run() 115 | }).unwrap(); 116 | 117 | let _cluster_tx = cluster_tx.clone(); 118 | let h3 = thread::Builder::new().name(format!("poller::{}", node_id)).spawn(move || { 119 | loop { 120 | let notifications = poller.wait(TIMEOUT).unwrap(); 121 | if let Err(_) = _cluster_tx.send(ClusterMsg::PollNotifications(notifications)) { 122 | // The process is exiting 123 | return; 124 | } 125 | } 126 | }).unwrap(); 127 | 128 | (Node::new(node_id, exec_tx, cluster_tx, logger), vec![h1, h2, h3]) 129 | } 130 | -------------------------------------------------------------------------------- /src/members.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | use std::fmt::{Display, Formatter, Error}; 3 | use orset::{ORSet, Delta}; 4 | use node_id::NodeId; 5 | 6 | #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] 7 | pub struct Members { 8 | pub me: NodeId, 9 | orset: ORSet 10 | } 11 | 12 | impl Display for Members { 13 | fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { 14 | let mut members = self.orset.elements(); 15 | members.sort(); 16 | for member in members { 17 | try!(fmt.write_fmt(format_args!("{} \n", member.name))); 18 | } 19 | Ok(()) 20 | } 21 | } 22 | 23 | impl Members { 24 | pub fn new(node: NodeId) -> Members { 25 | let mut orset = ORSet::new(node.to_string()); 26 | orset.add(node.clone()); 27 | Members { 28 | me: node, 29 | orset: orset 30 | } 31 | } 32 | 33 | pub fn all(&self) -> HashSet { 34 | self.orset.elements().into_iter().collect() 35 | } 36 | 37 | pub fn join(&mut self, other: ORSet) { 38 | self.orset.join_state(other); 39 | } 40 | 41 | /// Returns None if this node has not ever seen an add of the element 42 | pub fn leave(&mut self, leaving: NodeId) -> Option> { 43 | if let Some(dots) = self.orset.seen(&leaving) { 44 | return Some(self.orset.remove(leaving, dots)); 45 | } 46 | None 47 | } 48 | 49 | pub fn join_delta(&mut self, delta: Delta) -> bool { 50 | self.orset.join(delta) 51 | } 52 | 53 | pub fn get_orset(&self) -> ORSet { 54 | self.orset.clone() 55 | } 56 | 57 | pub fn add(&mut self, element: NodeId) -> Delta { 58 | self.orset.add(element) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/metrics.rs: -------------------------------------------------------------------------------- 1 | use serde::{Serialize, Deserialize}; 2 | use std::fmt::Debug; 3 | 4 | // A container type for status information for a given component 5 | pub trait Metrics<'de>: Serialize + Deserialize<'de> + Debug + Clone { 6 | fn data(&self) -> Vec<(String, Metric)>; 7 | } 8 | 9 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 10 | pub enum Metric { 11 | Gauge(i64), 12 | Counter(u64) 13 | } 14 | 15 | /// Generate a struct: `$struct_name` from a set of metrics 16 | /// 17 | /// Generate the impl containing the constructor, `$struct_name::new()` 18 | /// Generate `impl Metrics for $struct_name` constructing the Metric 19 | /// variants returned from `$struct_name::data` based on the type of the struct fields. 20 | macro_rules! metrics { 21 | ($struct_name:ident { 22 | $( $field:ident: $ty:ident ),+ 23 | }) => { 24 | #[derive(Debug, Clone, Serialize, Deserialize)] 25 | pub struct $struct_name { 26 | $( pub $field: $ty ),+ 27 | } 28 | 29 | impl $struct_name { 30 | pub fn new() -> $struct_name { 31 | $struct_name { 32 | $( $field: 0 ),+ 33 | } 34 | } 35 | } 36 | 37 | impl<'de> Metrics<'de> for $struct_name { 38 | fn data(&self) -> Vec<(String, Metric)> { 39 | vec![ 40 | $( (stringify!($field).into(), type_to_metric!($ty)(self.$field)) ),+ 41 | ] 42 | } 43 | } 44 | } 45 | } 46 | 47 | macro_rules! type_to_metric { 48 | (i64) => { Metric::Gauge }; 49 | (u64) => { Metric::Counter }; 50 | } 51 | -------------------------------------------------------------------------------- /src/msg.rs: -------------------------------------------------------------------------------- 1 | use cluster::ClusterStatus; 2 | use executor::ExecutorStatus; 3 | use correlation_id::CorrelationId; 4 | use metrics::Metric; 5 | 6 | type Name = String; 7 | 8 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 9 | pub enum Msg { 10 | User(T), 11 | ClusterStatus(ClusterStatus), 12 | ExecutorStatus(ExecutorStatus), 13 | StartTimer(usize), // time in ms 14 | CancelTimer(Option), 15 | Timeout, 16 | Shutdown, 17 | GetMetrics, 18 | Metrics(Vec<(Name, Metric)>) 19 | } 20 | -------------------------------------------------------------------------------- /src/node.rs: -------------------------------------------------------------------------------- 1 | use std::sync::mpsc::Sender; 2 | use std::fmt::Debug; 3 | use serde::{Serialize, Deserialize}; 4 | use node_id::NodeId; 5 | use executor::ExecutorMsg; 6 | use cluster::ClusterMsg; 7 | use pid::Pid; 8 | use correlation_id::CorrelationId; 9 | use process::Process; 10 | use envelope::Envelope; 11 | use amy; 12 | use errors::*; 13 | use slog; 14 | 15 | macro_rules! send { 16 | ($s:ident.$t:ident, $msg:expr, $pid:expr, $errmsg:expr) => { 17 | if let Err(_) = $s.$t.send($msg) { 18 | return Err(ErrorKind::SendError($errmsg, $pid.cloned()).into()) 19 | } else { 20 | return Ok(()); 21 | } 22 | } 23 | } 24 | 25 | /// A Node represents a way for services to interact with rabble internals. 26 | /// 27 | /// The Node api is used by services and their handlers to send messages, get status, join 28 | /// nodes into a cluster, etc... 29 | #[derive(Clone)] 30 | pub struct Node { 31 | pub id: NodeId, 32 | pub logger: slog::Logger, 33 | executor_tx: Sender>, 34 | cluster_tx: Sender> 35 | } 36 | 37 | impl<'de, T: Serialize + Deserialize<'de> + Debug + Clone> Node { 38 | /// Create a new node. This function should not be called by the user directly. It is called by 39 | /// by the user call to `rabble::rouse(..)` that initializes a rabble system for a single node. 40 | pub fn new(id: NodeId, 41 | executor_tx: Sender>, 42 | cluster_tx: Sender>, 43 | logger: slog::Logger) -> Node { 44 | Node { 45 | id: id, 46 | executor_tx: executor_tx, 47 | cluster_tx: cluster_tx, 48 | logger: logger 49 | } 50 | } 51 | 52 | /// Join 1 node to another to form a cluster. 53 | /// 54 | /// Node joins are transitive such that if `Node A` joins `Node B` which is already joined with 55 | /// `Node C`, then `Node A` will become connected to both `Node B` and `Node C`. 56 | /// 57 | /// Join's are not immediate. The local member state is updated and the joining node will 58 | /// continuously try to connect to the remote node so that they can exchange membership 59 | /// information and participate in peer operations. 60 | pub fn join(&self, node_id: &NodeId) -> Result<()> { 61 | send!(self.cluster_tx, 62 | ClusterMsg::Join(node_id.clone()), 63 | None, 64 | format!("ClusterMsg::Join({:?})", *node_id)) 65 | } 66 | 67 | pub fn leave(&self, node_id: &NodeId) -> Result<()> { 68 | send!(self.cluster_tx, 69 | ClusterMsg::Leave(node_id.clone()), 70 | None, 71 | format!("ClusterMsg::Leave({:?})", *node_id)) 72 | } 73 | 74 | /// Add a process to the executor that can be sent Envelopes addressed to its pid 75 | pub fn spawn(&self, pid: &Pid, process: Box>) -> Result<()> { 76 | send!(self.executor_tx, 77 | ExecutorMsg::Start(pid.clone(), process), 78 | Some(pid), 79 | format!("ExecutorMsg::Start({}, ..)", pid)) 80 | } 81 | 82 | /// Remove a process from the executor 83 | pub fn stop(&self, pid: &Pid) -> Result<()> { 84 | send!(self.executor_tx, 85 | ExecutorMsg::Stop(pid.clone()), 86 | Some(pid), 87 | format!("ExecutorMsg::Start({}, ..)", pid)) 88 | } 89 | 90 | /// Register a Service's sender with the executor so that it can be sent messages addressed to 91 | /// its pid 92 | pub fn register_service(&self, pid: &Pid, tx: &amy::Sender>) -> Result<()> 93 | { 94 | send!(self.executor_tx, 95 | ExecutorMsg::RegisterService(pid.clone(), tx.try_clone()?), 96 | Some(pid), 97 | format!("ExecutorMsg::RegisterService({}, ..)", pid)) 98 | } 99 | 100 | /// Send an envelope to the executor so it gets routed to the appropriate process or service 101 | pub fn send(&self, envelope: Envelope) -> Result<()> { 102 | let to = envelope.to.clone(); 103 | send!(self.executor_tx, 104 | ExecutorMsg::Envelope(envelope), 105 | Some(&to), 106 | "ExecutorMsg::Envelope(envelope)".to_string()) 107 | } 108 | 109 | /// Get the status of the executor 110 | pub fn executor_status(&self, correlation_id: CorrelationId) -> Result<()> { 111 | let to = correlation_id.pid.clone(); 112 | send!(self.executor_tx, 113 | ExecutorMsg::GetStatus(correlation_id), 114 | Some(&to), 115 | "ExecutorMsg::GetStatus".to_string()) 116 | } 117 | 118 | /// Get the status of the cluster server 119 | pub fn cluster_status(&self, correlation_id: CorrelationId) -> Result<()> { 120 | let to = correlation_id.pid.clone(); 121 | send!(self.cluster_tx, 122 | ClusterMsg::GetStatus(correlation_id), 123 | Some(&to), 124 | "ClusterMsg::GetStatus".to_string()) 125 | } 126 | 127 | /// Shutdown the node 128 | pub fn shutdown(&self) { 129 | self.executor_tx.send(ExecutorMsg::Shutdown).unwrap(); 130 | self.cluster_tx.send(ClusterMsg::Shutdown).unwrap(); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/node_id.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Display, Error, Formatter}; 2 | use std::str::FromStr; 3 | 4 | #[derive(Debug, Clone, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)] 5 | pub struct NodeId { 6 | pub name: String, 7 | pub addr: String 8 | } 9 | 10 | impl Display for NodeId { 11 | fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> { 12 | try!(fmt.write_fmt(format_args!("{}@{}", self.name, self.addr))); 13 | Ok(()) 14 | } 15 | } 16 | 17 | impl FromStr for NodeId { 18 | type Err = String; 19 | 20 | fn from_str(s: &str) -> Result { 21 | let v: Vec<&str> = s.split("@").collect(); 22 | if v.len() != 2 { 23 | return Err("Invalid NodeId format - Must be of form 'name@addr'".to_string()) 24 | } 25 | Ok(NodeId { 26 | name: v[0].to_string(), 27 | addr: v[1].to_string() 28 | }) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/pid.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Display, Error, Formatter}; 2 | use std::str::FromStr; 3 | use node_id::NodeId; 4 | 5 | /// A globally unique process id 6 | /// 7 | /// Pids can be grouped together for various reasons. This grouping acts like a namespace. If 8 | /// a Process is not a member of a group, the `group` member of the Pid will be `None`. 9 | #[derive(Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)] 10 | pub struct Pid { 11 | pub group: Option, 12 | pub name: String, 13 | pub node: NodeId, 14 | } 15 | 16 | /// Explicitly format Pid in the display format since it is huge when pretty printing and they are 17 | /// used all over the place. 18 | impl Debug for Pid { 19 | fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { 20 | Display::fmt(&self, f) 21 | } 22 | } 23 | 24 | impl Display for Pid { 25 | fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { 26 | match self.group { 27 | None => write!(f, "{}::{}", self.name, self.node), 28 | Some(ref g) => write!(f, "{}::{}::{}", g, self.name, self.node) 29 | } 30 | } 31 | } 32 | 33 | impl FromStr for Pid { 34 | type Err = String; 35 | 36 | fn from_str(s: &str) -> Result { 37 | let v: Vec<&str> = s.split("::").collect(); 38 | match v.len() { 39 | 2 => Ok(Pid { 40 | group: None, 41 | name: v[0].to_string(), 42 | node: try!(NodeId::from_str(v[1])) 43 | }), 44 | 3 => Ok(Pid { 45 | group: Some(v[0].to_string()), 46 | name: v[1].to_string(), 47 | node: try!(NodeId::from_str(v[2])) 48 | }), 49 | _ => return Err( 50 | "Invalid Pid format - Must be of form 'name::node' or \ 51 | 'group::name::node'".to_string() 52 | ) 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/process.rs: -------------------------------------------------------------------------------- 1 | use pid::Pid; 2 | use msg::Msg; 3 | use envelope::Envelope; 4 | use correlation_id::CorrelationId; 5 | 6 | pub trait Process : Send { 7 | /// Initialize process state if necessary 8 | fn init(&mut self, _executor_pid: Pid) -> Vec> { 9 | Vec::new() 10 | } 11 | 12 | /// Handle messages from other actors 13 | fn handle(&mut self, 14 | msg: Msg, 15 | from: Pid, 16 | correlation_id: Option, 17 | output: &mut Vec>); 18 | } 19 | -------------------------------------------------------------------------------- /src/serialize/mod.rs: -------------------------------------------------------------------------------- 1 | mod serialize; 2 | mod msgpack; 3 | mod protobuf; 4 | 5 | pub use self::serialize::Serialize; 6 | pub use self::msgpack::MsgpackSerializer; 7 | pub use self::protobuf::ProtobufSerializer; 8 | -------------------------------------------------------------------------------- /src/serialize/msgpack.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Read, Write}; 2 | use std::marker::PhantomData; 3 | use std::fmt::Debug; 4 | use amy::{FrameReader, FrameWriter}; 5 | use msgpack::{Serializer, Deserializer}; 6 | use serde::{Serialize, Deserialize}; 7 | use errors::*; 8 | use serialize; 9 | 10 | const MAX_FRAME_SIZE: u32 = 64*1024*1024; // 64 MB 11 | 12 | pub struct MsgpackSerializer { 13 | frame_reader: FrameReader, 14 | frame_writer: FrameWriter, 15 | phantom: PhantomData 16 | } 17 | 18 | impl<'de, T: Serialize + Deserialize<'de> + Debug + Clone> serialize::Serialize for MsgpackSerializer { 19 | type Msg = T; 20 | 21 | fn new() -> MsgpackSerializer { 22 | MsgpackSerializer { 23 | frame_reader: FrameReader::new(MAX_FRAME_SIZE), 24 | frame_writer: FrameWriter::new(), 25 | phantom: PhantomData 26 | } 27 | } 28 | 29 | fn read_msg(&mut self, reader: &mut U) -> Result> { 30 | try!(self.frame_reader.read(reader).chain_err(|| "Msgpack Serializer failed to read from socket")); 31 | self.frame_reader.iter_mut().next().map_or(Ok(None), |frame| { 32 | 33 | let mut deserializer = Deserializer::new(&frame[..]); 34 | let msg = try!(Deserialize::deserialize(&mut deserializer) 35 | .chain_err(|| "Failed to decode msgpack frame")); 36 | Ok(Some(msg)) 37 | }) 38 | } 39 | 40 | fn write_msgs(&mut self, writer: &mut U, msg: Option<&T>) -> Result { 41 | if msg.is_none() { 42 | return self.frame_writer.write(writer, None) 43 | .chain_err(|| "Failed to write encoded message") 44 | } 45 | 46 | let mut encoded = Vec::new(); 47 | try!(msg.as_ref().unwrap().serialize(&mut Serializer::new(&mut encoded)) 48 | .chain_err(|| format!("Failed to encode message {:?}", msg))); 49 | self.frame_writer.write(writer, Some(encoded)) 50 | .chain_err(|| "Failed to write encoded message") 51 | } 52 | 53 | fn set_writable(&mut self) { 54 | self.frame_writer.writable(); 55 | } 56 | 57 | fn is_writable(&self) -> bool { 58 | self.frame_writer.is_writable() 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/serialize/protobuf.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Read, Write}; 2 | use std::marker::PhantomData; 3 | use amy::{FrameReader, FrameWriter}; 4 | use protobuf::{Message, MessageStatic, parse_from_bytes}; 5 | use errors::*; 6 | use serialize::Serialize; 7 | 8 | const MAX_FRAME_SIZE: u32 = 64*1024*1024; // 64 MB 9 | 10 | pub struct ProtobufSerializer { 11 | frame_reader: FrameReader, 12 | frame_writer: FrameWriter, 13 | phantom: PhantomData 14 | } 15 | 16 | impl Serialize for ProtobufSerializer { 17 | type Msg = M; 18 | 19 | fn new() -> ProtobufSerializer { 20 | ProtobufSerializer { 21 | frame_reader: FrameReader::new(MAX_FRAME_SIZE), 22 | frame_writer: FrameWriter::new(), 23 | phantom: PhantomData 24 | } 25 | } 26 | 27 | fn read_msg(&mut self, reader: &mut U) -> Result> { 28 | try!(self.frame_reader.read(reader).chain_err(|| "Serializer failed to read from socket")); 29 | self.frame_reader.iter_mut().next().map_or(Ok(None), |frame| { 30 | let msg: M = try!(parse_from_bytes(&frame[..])); 31 | Ok(Some(msg)) 32 | }) 33 | } 34 | 35 | fn write_msgs(&mut self, writer: &mut U, msg: Option<&M>) -> Result { 36 | if msg.is_none() { 37 | return self.frame_writer.write(writer, None) 38 | .chain_err(|| "Failed to write encoded message") 39 | } 40 | let encoded = try!(msg.as_ref().unwrap().write_to_bytes()); 41 | self.frame_writer.write(writer, Some(encoded)) 42 | .chain_err(|| "Failed to write encoded message") 43 | } 44 | 45 | fn set_writable(&mut self) { 46 | self.frame_writer.writable(); 47 | } 48 | 49 | fn is_writable(&self) -> bool { 50 | self.frame_writer.is_writable() 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/serialize/serialize.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Read, Write}; 2 | use std::fmt::Debug; 3 | use errors::*; 4 | 5 | /// This trait provides for reading framed messages from a `Read` type, decoding them and 6 | /// returning them. It buffers incomplete messages. Reading of only a single message of a time is to 7 | /// allow for strategies that prevent starvation of other readers. 8 | /// 9 | /// This trait provides for serializing and framing messages, and then writing them to a `Write` 10 | /// type. When a complete message cannot be sent it is buffered for when the `Write` type is next 11 | /// writable. 12 | /// 13 | /// We write all possible data to the writer until it blocks or there is no more data to be written. 14 | /// Since all output is in response to input, we don't worry about starvation of writers. In order 15 | /// to minimize memory consumption we just write as much as possible and worry about starvation 16 | /// management on the reader side. 17 | pub trait Serialize { 18 | type Msg: Clone + Debug; 19 | 20 | fn new() -> Self; 21 | 22 | /// Read and decode a single message at a time. 23 | /// 24 | /// This function should be called until it returns Ok(None) in which case there is no more data 25 | /// left to return. For async sockets this signals that the socket should be re-registered. 26 | fn read_msg(&mut self, reader: &mut T) -> Result>; 27 | 28 | /// Write out as much pending data as possible. Append `msg` to the pending data if not `None`. 29 | /// If this function returns `Ok(false)` the writer is no longer writable (EAGAIN/EWOULDBLOCK) 30 | fn write_msgs(&mut self, writer: &mut T, msg: Option<&Self::Msg>) -> Result; 31 | 32 | /// As an optimization to prevent unnecessary write system calls, the serializer should keep 33 | /// track of whether the writer is writable or not. The serializer will automatically be set to 34 | /// unwritable if `write_msgs` returns `Ok(false)`. When the poller fires and lets us know that 35 | /// the writer is writable again, we should call this function to inform the serializer so that 36 | /// it will attempt to write to the writer and not just buffer the request. 37 | fn set_writable(&mut self); 38 | 39 | /// Tell us whether or not the serializer believes the associated writer is writable or not. 40 | fn is_writable(&self) -> bool; 41 | } 42 | -------------------------------------------------------------------------------- /src/service/connection_handler.rs: -------------------------------------------------------------------------------- 1 | use envelope::Envelope; 2 | use correlation_id::CorrelationId; 3 | use pid::Pid; 4 | 5 | /// Implement this for a specific connection handler 6 | pub trait ConnectionHandler: Sized { 7 | type Msg; 8 | type ClientMsg; 9 | 10 | fn new(pid: Pid, id: u64) -> Self; 11 | fn handle_envelope(&mut self, Envelope, &mut Vec>); 12 | fn handle_network_msg(&mut self, Self::ClientMsg, &mut Vec>); 13 | } 14 | 15 | /// Connection messages are returned from the callback functions for a Connection. 16 | /// 17 | /// These messages can be either an envelope as gets used in the rest of the system or a message 18 | /// specific to this service that can be serialized and sent to a client on the other end of the 19 | /// connection. 20 | pub enum ConnectionMsg 21 | { 22 | Envelope(Envelope), 23 | Client(C::ClientMsg, CorrelationId) 24 | } 25 | -------------------------------------------------------------------------------- /src/service/mod.rs: -------------------------------------------------------------------------------- 1 | mod service; 2 | mod connection_handler; 3 | mod service_handler; 4 | mod tcp_server_handler; 5 | 6 | 7 | pub use self::service::Service; 8 | pub use self::connection_handler::{ 9 | ConnectionHandler, 10 | ConnectionMsg 11 | }; 12 | pub use self::service_handler::ServiceHandler; 13 | pub use self::tcp_server_handler::TcpServerHandler; 14 | -------------------------------------------------------------------------------- /src/service/service.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use amy::{self, Poller, Registrar}; 3 | use pid::Pid; 4 | use serde::{Serialize, Deserialize}; 5 | use msg::Msg; 6 | use envelope::Envelope; 7 | use node::Node; 8 | use errors::*; 9 | use slog; 10 | use super::ServiceHandler; 11 | 12 | /// A system service that operates on a single thread. A service is registered via its pid 13 | /// with the executor and can send and receive messages to processes as well as other services. 14 | pub struct Service { 15 | pub pid: Pid, 16 | pub tx: amy::Sender>, 17 | rx: amy::Receiver>, 18 | node: Node, 19 | poller: Poller, 20 | registrar: Registrar, 21 | handler: H, 22 | logger: slog::Logger 23 | } 24 | 25 | impl<'de, T, H> Service 26 | where T: Serialize + Deserialize<'de> + Debug + Clone, 27 | H: ServiceHandler 28 | { 29 | pub fn new(pid: Pid, node: Node, mut handler: H) 30 | -> Result> 31 | { 32 | let poller = Poller::new().unwrap(); 33 | let mut registrar = poller.get_registrar()?; 34 | let (tx, rx) = registrar.channel()?; 35 | node.register_service(&pid, &tx)?; 36 | handler.init(®istrar, &node)?; 37 | let logger = node.logger.new(o!("component" => "service", "pid" => pid.to_string())); 38 | Ok(Service { 39 | pid: pid, 40 | tx: tx, 41 | rx: rx, 42 | node: node, 43 | poller: poller, 44 | registrar: registrar, 45 | handler: handler, 46 | logger: logger 47 | }) 48 | } 49 | 50 | pub fn wait(&mut self) { 51 | loop { 52 | // TODO: Configurable timeout? 53 | for notification in self.poller.wait(1000).unwrap() { 54 | if notification.id == self.rx.get_id() { 55 | if let Err(e) = self.handle_envelopes() { 56 | if let ErrorKind::Shutdown(_) = *e.kind() { 57 | info!(self.logger, "Service shutting down"; 58 | "pid" => self.pid.to_string()); 59 | return; 60 | } 61 | error!(self.logger, 62 | "Failed to handle envelope"; 63 | "error" => e.to_string()) 64 | } 65 | } else { 66 | if let Err(e) = self.handler.handle_notification(&self.node, 67 | notification, 68 | &self.registrar) { 69 | warn!(self.logger, 70 | "Failed to handle poll notification"; 71 | "error" => e.to_string()) 72 | } 73 | } 74 | } 75 | } 76 | } 77 | 78 | pub fn handle_envelopes(&mut self) -> Result<()> { 79 | while let Ok(envelope) = self.rx.try_recv() { 80 | if let Msg::Shutdown = envelope.msg { 81 | return Err(ErrorKind::Shutdown(self.pid.clone()).into()); 82 | } 83 | try!(self.handler.handle_envelope(&self.node, envelope, &self.registrar)); 84 | } 85 | Ok(()) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/service/service_handler.rs: -------------------------------------------------------------------------------- 1 | use amy::{Notification, Registrar}; 2 | use envelope::Envelope; 3 | use node::Node; 4 | use errors::*; 5 | 6 | /// A service handler 7 | pub trait ServiceHandler { 8 | /// A callback function used to initialize the handler. 9 | /// 10 | /// The handler is expected to register any necessary timeouts or listening sockets with the 11 | /// poller and send any initialization messages via the Node. Some handlers may not need any 12 | /// initialization, so this callback is optional. 13 | fn init(&mut self, &Registrar, &Node) -> Result<()> { 14 | Ok(()) 15 | } 16 | 17 | /// Handle poll notifications. 18 | 19 | /// Some handler don't register anything that requires notification and only receive 20 | /// envelopes. Those handlers do not need to implement this function. 21 | fn handle_notification(&mut self, &Node, Notification, &Registrar) -> Result<()> { 22 | // TODO: Log message 23 | Ok(()) 24 | } 25 | 26 | /// Handle any envelopes addressed to the service's Pid. All handlers must implement 27 | /// this function. 28 | fn handle_envelope(&mut self, &Node, Envelope, &Registrar) -> Result<()>; 29 | } 30 | -------------------------------------------------------------------------------- /src/service/tcp_server_handler.rs: -------------------------------------------------------------------------------- 1 | use std::net::{TcpListener, TcpStream}; 2 | use std::collections::HashMap; 3 | use std::io; 4 | use std::fmt::Debug; 5 | use serde; 6 | use amy::{Registrar, Notification, Event}; 7 | use errors::*; 8 | use msg::Msg; 9 | use envelope::Envelope; 10 | use node::Node; 11 | use timer_wheel::TimerWheel; 12 | use pid::Pid; 13 | use correlation_id::CorrelationId; 14 | use serialize::Serialize; 15 | use super::{ServiceHandler, ConnectionHandler, ConnectionMsg}; 16 | 17 | // The timer wheel expirations are accurate to within 1/TIMER_WHEEL_SLOTS of the timeout 18 | const TIMER_WHEEL_SLOTS: usize = 10; 19 | 20 | struct Connection 21 | where C: ConnectionHandler, 22 | S: Serialize 23 | { 24 | id: usize, 25 | handler: C, 26 | serializer: S, 27 | sock: TcpStream, 28 | timer_wheel_slot: usize 29 | } 30 | 31 | impl Connection 32 | where C: ConnectionHandler, 33 | S: Serialize 34 | { 35 | pub fn new(id: usize, 36 | handler: C, 37 | sock: TcpStream, 38 | slot: usize) -> Connection 39 | { 40 | Connection { 41 | id: id, 42 | handler: handler, 43 | serializer: S::new(), 44 | sock: sock, 45 | timer_wheel_slot: slot 46 | } 47 | } 48 | } 49 | 50 | /// A service handler for an async TCP server 51 | pub struct TcpServerHandler 52 | where C: ConnectionHandler, 53 | S: Serialize 54 | { 55 | pid: Pid, 56 | listener: TcpListener, 57 | listener_id: usize, 58 | connections: HashMap>, 59 | connection_timeout: Option, // ms 60 | connection_timer_id: Option, 61 | connection_timer_wheel: Option>, 62 | request_timeout: usize, // ms 63 | request_timer_id: usize, 64 | request_timer_wheel: TimerWheel, 65 | output: Vec> 66 | 67 | } 68 | 69 | impl<'de, C, S> TcpServerHandler 70 | where C: ConnectionHandler, 71 | S: Serialize, 72 | C::Msg: serde::Serialize + serde::Deserialize<'de> + Clone + Debug 73 | 74 | { 75 | /// Create a new TcpServerHandler 76 | /// 77 | /// Bind to `addr` and close a connection that hasn't received a message in `connection_timeout` 78 | /// ms. Note that the connection timeout is optional. 79 | /// 80 | /// Every request with a CorrelationId is also tracked with a timer. This `request_timeout` is 81 | /// not optional as every request can potentially fail, or be delayed indefinitely. 82 | pub fn new(pid: Pid, 83 | addr: &str, 84 | request_timeout: usize, 85 | connection_timeout: Option) -> TcpServerHandler 86 | { 87 | let mut connection_timer_wheel = None; 88 | if connection_timeout.is_some() { 89 | connection_timer_wheel = Some(TimerWheel::new(TIMER_WHEEL_SLOTS + 1)); 90 | } 91 | let listener = TcpListener::bind(addr).unwrap(); 92 | listener.set_nonblocking(true).unwrap(); 93 | TcpServerHandler { 94 | pid: pid, 95 | listener: listener, 96 | listener_id: 0, 97 | connections: HashMap::new(), 98 | connection_timeout: connection_timeout, 99 | connection_timer_id: None, 100 | connection_timer_wheel: connection_timer_wheel, 101 | request_timeout: request_timeout, 102 | request_timer_id: 0, // Dummy timer id for now. Will be set in init() 103 | request_timer_wheel: TimerWheel::new(TIMER_WHEEL_SLOTS + 1), 104 | output: Vec::new() 105 | } 106 | } 107 | 108 | fn accept_connections(&mut self, registrar: &Registrar) -> Result<()> { 109 | loop { 110 | match self.listener.accept() { 111 | Ok((socket, _)) => { 112 | try!(self.new_connection(socket, registrar)); 113 | }, 114 | Err(e) => { 115 | if e.kind() == io::ErrorKind::WouldBlock { 116 | return Ok(()) 117 | } 118 | return Err(e.into()) 119 | } 120 | } 121 | } 122 | } 123 | 124 | /// Setup a new Connection object 125 | /// 126 | /// Make the socket nonblocking, register it for reads, and establish the connection timeout. 127 | fn new_connection(&mut self, sock: TcpStream, registrar: &Registrar) -> Result<()> { 128 | try!(sock.set_nonblocking(true).chain_err(|| "Failed to make socket nonblocking")); 129 | let id = try!(registrar.register(&sock, Event::Read) 130 | .chain_err(|| "Failed to register new socket for reading")); 131 | let handler = C::new(self.pid.clone(), id as u64); 132 | let slot = self.connection_timer_wheel.as_mut().map_or(0, |mut tw| tw.insert(id)); 133 | let connection = Connection::new(id, handler, sock, slot); 134 | self.connections.insert(id, connection); 135 | Ok(()) 136 | } 137 | 138 | fn handle_connection_notification(&mut self, 139 | notification: &Notification, 140 | node: &Node) -> Result<()> 141 | { 142 | if let Some(connection) = self.connections.get_mut(¬ification.id) { 143 | if notification.event.writable() { 144 | // Notify the serializer that the socket is writable again 145 | connection.serializer.set_writable(); 146 | try!(connection.serializer.write_msgs(&mut connection.sock, None)); 147 | } 148 | 149 | if notification.event.readable() { 150 | try!(handle_readable(connection, 151 | &mut self.request_timer_wheel, 152 | node, 153 | &mut self.output)); 154 | update_connection_timeout(connection, &mut self.connection_timer_wheel); 155 | } 156 | } 157 | Ok(()) 158 | } 159 | 160 | fn connection_tick(&mut self, registrar: &Registrar) { 161 | for id in self.connection_timer_wheel.as_mut().unwrap().expire() { 162 | if let Some(connection) = self.connections.remove(&id) { 163 | let _ = registrar.deregister(connection.sock); 164 | // TODO: Log connection timeout 165 | } 166 | } 167 | } 168 | 169 | /// Handle request timer events and see if any requests have timed out. 170 | fn request_tick(&mut self, node: &Node) -> Result<()>{ 171 | for correlation_id in self.request_timer_wheel.expire() { 172 | let conn_id = correlation_id.connection.as_ref().unwrap(); 173 | if let Some(mut connection) = self.connections.get_mut(&(*conn_id as usize)) { 174 | let envelope = Envelope { 175 | from: self.pid.clone(), 176 | to: self.pid.clone(), 177 | msg: Msg::Timeout, 178 | correlation_id: Some(correlation_id.clone()) 179 | }; 180 | connection.handler.handle_envelope(envelope, &mut self.output); 181 | try!(handle_connection_msgs(&mut self.request_timer_wheel, 182 | &mut self.output, 183 | &mut connection.serializer, 184 | &mut connection.sock, 185 | node)); 186 | } 187 | } 188 | Ok(()) 189 | } 190 | } 191 | 192 | 193 | impl<'de, C, S> ServiceHandler for TcpServerHandler 194 | where C: ConnectionHandler, 195 | S: Serialize, 196 | C::Msg: serde::Serialize + serde::Deserialize<'de> + Clone + Debug 197 | { 198 | /// Initialize the state of the handler: Register timers and tcp listen socket 199 | fn init(&mut self, 200 | registrar: &Registrar, 201 | _node: &Node) -> Result<()> 202 | { 203 | self.listener_id = try!(registrar.register(&self.listener, Event::Read) 204 | .chain_err(|| "Failed to register listener")); 205 | 206 | let req_timeout = self.request_timeout / TIMER_WHEEL_SLOTS; 207 | self.request_timer_id = try!(registrar.set_interval(req_timeout) 208 | .chain_err(|| "Failed to register request timer")); 209 | 210 | if self.connection_timeout.is_some() { 211 | let timeout = self.connection_timeout.unwrap() / TIMER_WHEEL_SLOTS; 212 | self.connection_timer_id = Some(try!(registrar.set_interval(timeout) 213 | .chain_err(|| "Failed to register connection timer"))); 214 | } 215 | Ok(()) 216 | } 217 | 218 | /// Handle any poll notifications 219 | fn handle_notification(&mut self, 220 | node: &Node, 221 | notification: Notification, 222 | registrar: &Registrar) -> Result<()> 223 | { 224 | if notification.id == self.listener_id { 225 | return self.accept_connections(registrar); 226 | } 227 | 228 | if notification.id == self.request_timer_id { 229 | return self.request_tick(&node); 230 | } 231 | 232 | if self.connection_timer_id.is_some() 233 | && notification.id == self.connection_timer_id.unwrap() 234 | { 235 | self.connection_tick(®istrar); 236 | return Ok(()); 237 | } 238 | 239 | if let Err(e) = self.handle_connection_notification(¬ification, &node) { 240 | // Unwrap is correct here since the above call only fails if the connection exists 241 | let connection = self.connections.remove(¬ification.id).unwrap(); 242 | let _ = registrar.deregister(connection.sock); 243 | let errmsg = e.to_string(); 244 | return Err(e).chain_err(|| format!("{}: id {}", errmsg, notification.id)); 245 | } 246 | Ok (()) 247 | 248 | } 249 | 250 | /// Handle an envelope from a process or service 251 | fn handle_envelope(&mut self, 252 | node: &Node, 253 | envelope: Envelope, 254 | _registrar: &Registrar) -> Result<()> 255 | { 256 | if envelope.correlation_id.is_none() { 257 | return Err(format!("No correlation id for envelope {:?}", envelope).into()); 258 | } 259 | // Don't bother cancelling request timers... Just ignore the timeouts in the connection if 260 | // the request has already received its reply 261 | let conn_id = envelope.correlation_id.as_ref().unwrap().connection.as_ref().cloned().unwrap(); 262 | if let Some(mut connection) = self.connections.get_mut(&(conn_id as usize)) { 263 | connection.handler.handle_envelope(envelope, &mut self.output); 264 | try!(handle_connection_msgs(&mut self.request_timer_wheel, 265 | &mut self.output, 266 | &mut connection.serializer, 267 | &mut connection.sock, 268 | node)); 269 | 270 | } 271 | Ok(()) 272 | } 273 | } 274 | 275 | /// Handle any readable notifications. 276 | fn handle_readable<'de, C, S>(connection: &mut Connection, 277 | request_timer_wheel: &mut TimerWheel, 278 | node: &Node, 279 | output: &mut Vec>) -> Result<()> 280 | where C: ConnectionHandler, 281 | S: Serialize, 282 | C::Msg: serde::Serialize + serde::Deserialize<'de> + Clone + Debug 283 | { 284 | while let Some(msg) = try!(connection.serializer.read_msg(&mut connection.sock)) { 285 | connection.handler.handle_network_msg(msg, output); 286 | try!(handle_connection_msgs(request_timer_wheel, 287 | output, 288 | &mut connection.serializer, 289 | &mut connection.sock, 290 | node)); 291 | } 292 | Ok(()) 293 | } 294 | 295 | /// A new message has been received on a connection. Reset the timer. 296 | fn update_connection_timeout(connection: &mut Connection, 297 | timer_wheel: &mut Option>) 298 | where C: ConnectionHandler, 299 | S: Serialize 300 | { 301 | if timer_wheel.is_none() { return; } 302 | let mut timer_wheel = timer_wheel.as_mut().unwrap(); 303 | timer_wheel.remove(&connection.id, connection.timer_wheel_slot); 304 | connection.timer_wheel_slot = timer_wheel.insert(connection.id); 305 | } 306 | 307 | /// Send client replies and route envelopes 308 | /// 309 | /// For any envelopes with correlation ids, record them in the request timer wheel. 310 | fn handle_connection_msgs<'de, C, S>(request_timer_wheel: &mut TimerWheel, 311 | msgs: &mut Vec>, 312 | serializer: &mut S, 313 | sock: &mut TcpStream, 314 | node: &Node) -> Result<()> 315 | where C: ConnectionHandler, 316 | S: Serialize, 317 | C::Msg: serde::Serialize + serde::Deserialize<'de> + Clone + Debug 318 | { 319 | for m in msgs.drain(..) { 320 | match m { 321 | ConnectionMsg::Envelope(envelope) => { 322 | if envelope.correlation_id.is_some() { 323 | request_timer_wheel.insert(envelope.correlation_id.as_ref().unwrap().clone()); 324 | } 325 | node.send(envelope).unwrap(); 326 | }, 327 | ConnectionMsg::Client(client_msg, _) => { 328 | // Respond to the client 329 | try!(serializer.write_msgs(sock, Some(&client_msg)) 330 | .chain_err(|| format!("Failed to write client msg: {:?}", 331 | client_msg))); 332 | } 333 | } 334 | } 335 | Ok(()) 336 | } 337 | -------------------------------------------------------------------------------- /src/timer_wheel.rs: -------------------------------------------------------------------------------- 1 | use std::hash::Hash; 2 | use std::collections::HashSet; 3 | 4 | /// A very simple timer wheel optimized for large numbers of timers using 5 | /// identical timeout lengths. This type of timer wheel is useful for things like client timeouts, 6 | /// which are all identical, but not useful for storing a variety of different timeout lengths. On 7 | /// the other hand it works without tracking actual time and provides O(1) `lookup`, `remove` and 8 | /// `expire` functions. It just requires a single approximately fixed length tick to drive the 9 | /// wheel. 10 | /// 11 | /// Each timer, of say, 5 seconds, is inserted into the `current_write` slot. On each tick the slot 12 | /// pointers will advance. Timers inserted in the `current_read` spot will then all expire and be 13 | /// returned for processing. 14 | pub struct TimerWheel { 15 | size: usize, 16 | data: Vec>, 17 | current_read: usize, 18 | current_write: usize 19 | } 20 | 21 | impl TimerWheel { 22 | pub fn new(size: usize) -> TimerWheel { 23 | let mut data = Vec::with_capacity(size); 24 | for _ in 0..size { 25 | data.push(HashSet::new()); 26 | } 27 | TimerWheel { 28 | size: size, 29 | current_write: size - 1, 30 | current_read: 0, 31 | data: data 32 | } 33 | } 34 | 35 | /// Insert a new connection to the current writing wheel slot 36 | /// Return the number slot so the write can be removed in O(1) time later. 37 | pub fn insert(&mut self, key: T) -> usize { 38 | let slot = self.data.get_mut(self.current_write).unwrap(); 39 | slot.insert(key); 40 | self.current_write 41 | } 42 | 43 | pub fn remove(&mut self, key: &T, slot: usize) { 44 | let set = self.data.get_mut(slot).unwrap(); 45 | let _ = set.remove(key); 46 | } 47 | 48 | pub fn expire(&mut self) -> HashSet { 49 | self.data.push(HashSet::new()); 50 | let set = self.data.swap_remove(self.current_read); 51 | self.update_pointers(); 52 | set 53 | } 54 | 55 | fn update_pointers(&mut self) { 56 | self.current_write = (self.current_write + 1) % self.size; 57 | self.current_read = (self.current_read + 1) % self.size; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /tests/join_leave.rs: -------------------------------------------------------------------------------- 1 | //! Test cluster joining and leaving 2 | 3 | extern crate amy; 4 | extern crate rabble; 5 | 6 | #[macro_use] 7 | extern crate assert_matches; 8 | extern crate serde; 9 | 10 | #[macro_use] 11 | extern crate serde_derive; 12 | 13 | extern crate slog; 14 | extern crate slog_stdlog; 15 | extern crate slog_envlogger; 16 | extern crate slog_term; 17 | extern crate log; 18 | extern crate time; 19 | 20 | mod utils; 21 | 22 | use std::str; 23 | use amy::{Poller, Receiver}; 24 | use time::Duration; 25 | 26 | use utils::messages::*; 27 | use utils::{ 28 | wait_for, 29 | start_nodes, 30 | test_pid, 31 | register_test_as_service 32 | }; 33 | 34 | use rabble::{ 35 | Envelope, 36 | Msg, 37 | ClusterStatus, 38 | Node, 39 | CorrelationId 40 | }; 41 | 42 | const NUM_NODES: usize = 3; 43 | 44 | #[test] 45 | fn join_leave() { 46 | let (nodes, handles) = start_nodes(NUM_NODES); 47 | 48 | // We create an amy channel so that we can pretend this test is a service. 49 | // We register the sender with all nodes so that we can check the responses to admin calls 50 | // like node.get_cluster_status(). 51 | let mut poller = Poller::new().unwrap(); 52 | let (test_tx, test_rx) = poller.get_registrar().unwrap().channel().unwrap(); 53 | 54 | register_test_as_service(&mut poller, &nodes, &test_tx, &test_rx); 55 | 56 | // join node1 to node2 57 | // Wait for the cluster status of both nodes to show they are connected 58 | nodes[0].join(&nodes[1].id).unwrap(); 59 | assert!(wait_for_cluster_status(&nodes[0], &test_rx, 1)); 60 | assert!(wait_for_cluster_status(&nodes[1], &test_rx, 1)); 61 | 62 | // Join node1 to node3. This will cause a delta to be sent from node1 to node2. Node3 will also 63 | // connect to node2 and send it's members, since it will learn of node2 from node1. Either way 64 | // all nodes should stabilize as knowing about each other. 65 | nodes[0].join(&nodes[2].id).unwrap(); 66 | for node in &nodes { 67 | assert!(wait_for_cluster_status(&node, &test_rx, 2)); 68 | } 69 | 70 | // Remove node2 from the cluster. This will cause a delta of the remove to be broadcast to node1 71 | // 1 and node3. Note that the request is sent to node1, not the node that is leaving. 72 | nodes[0].leave(&nodes[1].id).unwrap(); 73 | assert!(wait_for_cluster_status(&nodes[0], &test_rx, 1)); 74 | assert!(wait_for_cluster_status(&nodes[2], &test_rx, 1)); 75 | 76 | 77 | // Remove node1 from the cluster. This request goest to node1. It's possible in production that 78 | // the broadcast doesn't make it to node3 before node1 disconnects from node3 due to the 79 | // membership check on the next tick that removes connections. 80 | // TODO: make that work 81 | nodes[0].leave(&nodes[0].id).unwrap(); 82 | assert!(wait_for_cluster_status(&nodes[0], &test_rx, 0)); 83 | assert!(wait_for_cluster_status(&nodes[2], &test_rx, 0)); 84 | 85 | for node in nodes { 86 | node.shutdown(); 87 | } 88 | 89 | for h in handles { 90 | h.join().unwrap(); 91 | } 92 | } 93 | 94 | fn wait_for_cluster_status(node: &Node, 95 | test_rx: &Receiver>, 96 | num_connected: usize) -> bool 97 | { 98 | let timeout = Duration::seconds(5); 99 | let test_pid = test_pid(node.id.clone()); 100 | wait_for(timeout, || { 101 | let correlation_id = CorrelationId::pid(test_pid.clone()); 102 | node.cluster_status(correlation_id.clone()).unwrap(); 103 | if let Ok(envelope) = test_rx.try_recv() { 104 | if let Msg::ClusterStatus(ClusterStatus{established, num_connections, ..}) 105 | = envelope.msg 106 | { 107 | if established.len() == num_connected && num_connections == num_connected { 108 | return true; 109 | } 110 | } 111 | } 112 | false 113 | }) 114 | } 115 | 116 | -------------------------------------------------------------------------------- /tests/multi_node_chain_replication.rs: -------------------------------------------------------------------------------- 1 | extern crate amy; 2 | extern crate rabble; 3 | 4 | #[macro_use] 5 | extern crate assert_matches; 6 | extern crate serde; 7 | 8 | #[macro_use] 9 | extern crate serde_derive; 10 | 11 | extern crate slog; 12 | extern crate slog_stdlog; 13 | extern crate slog_envlogger; 14 | extern crate slog_term; 15 | extern crate log; 16 | extern crate time; 17 | 18 | mod utils; 19 | 20 | use std::{str, thread}; 21 | use std::net::TcpStream; 22 | use amy::{Poller, Receiver, Sender}; 23 | use time::{SteadyTime, Duration}; 24 | 25 | use utils::messages::*; 26 | use utils::replica::Replica; 27 | use utils::api_server; 28 | use utils::{ 29 | wait_for, 30 | start_nodes, 31 | send, 32 | test_pid, 33 | register_test_as_service 34 | }; 35 | 36 | use rabble::{ 37 | Pid, 38 | Envelope, 39 | Msg, 40 | ClusterStatus, 41 | Node, 42 | CorrelationId 43 | }; 44 | use rabble::serialize::{Serialize, MsgpackSerializer}; 45 | 46 | const API_SERVER_IP: &'static str = "127.0.0.1:22001"; 47 | const NUM_NODES: usize = 3; 48 | 49 | type CrNode = Node; 50 | type CrReceiver = Receiver>; 51 | type CrSender = Sender>; 52 | 53 | #[test] 54 | fn chain_replication() { 55 | let (nodes, mut handles) = start_nodes(NUM_NODES); 56 | 57 | // We create an amy channel so that we can pretend this test is a service. 58 | // We register the sender with all nodes so that we can check the responses to admin calls 59 | // like node.get_cluster_status(). 60 | let mut poller = Poller::new().unwrap(); 61 | let (test_tx, test_rx) = poller.get_registrar().unwrap().channel().unwrap(); 62 | 63 | register_test_as_service(&mut poller, &nodes, &test_tx, &test_rx); 64 | 65 | let pids = create_replica_pids(&nodes); 66 | 67 | // We only send API requests to node1, so only bother starting an API server on this node 68 | let (service_pid, service_tx, service_handle) = api_server::start(nodes[0].clone()); 69 | handles.push(service_handle); 70 | 71 | spawn_replicas(&nodes, &pids); 72 | 73 | join_nodes(&nodes, &mut poller, &test_rx); 74 | 75 | run_client_operations(&pids[0]); 76 | 77 | verify_histories(&pids); 78 | 79 | shutdown(nodes, service_pid, service_tx); 80 | 81 | for h in handles { 82 | h.join().unwrap(); 83 | } 84 | } 85 | 86 | 87 | fn spawn_replicas(nodes: &Vec, pids: &Vec) { 88 | for i in 0..pids.len() { 89 | let next = if i == pids.len() - 1 { 90 | None 91 | } else { 92 | Some(pids[i + 1].clone()) 93 | }; 94 | let replica = Box::new(Replica::new(pids[i].clone(), next)); 95 | nodes[i].spawn(&pids[i], replica).unwrap(); 96 | } 97 | } 98 | 99 | fn join_nodes(nodes: &Vec, poller: &mut Poller, test_rx: &CrReceiver) { 100 | nodes[0].join(&nodes[1].id).unwrap(); 101 | nodes[0].join(&nodes[2].id).unwrap(); 102 | wait_for_connected_cluster(&nodes, poller, test_rx); 103 | } 104 | 105 | /// launch 3 clients and send concurrent operations to the head of the chain 106 | fn run_client_operations(pid: &Pid) { 107 | let mut client_handles = Vec::new(); 108 | for i in 0..3 { 109 | let pid = pid.clone(); 110 | let h = thread::spawn(move || { 111 | let mut sock = TcpStream::connect(API_SERVER_IP).unwrap(); 112 | let mut serializer = MsgpackSerializer::new(); 113 | sock.set_nonblocking(true).unwrap(); 114 | send(&mut sock, &mut serializer, ApiClientMsg::Op(pid, i)); 115 | assert_eq!(true, wait_for(Duration::seconds(5), || { 116 | if let Ok(Some(ApiClientMsg::OpComplete)) = serializer.read_msg(&mut sock) { 117 | return true; 118 | } 119 | false 120 | })); 121 | }); 122 | client_handles.push(h); 123 | } 124 | 125 | for h in client_handles { 126 | h.join().unwrap(); 127 | } 128 | } 129 | 130 | /// Verify that after all client operations have gotten replies that the history of operations in 131 | /// each replica is identical. 132 | fn verify_histories(pids: &Vec) { 133 | let pids = pids.clone(); 134 | let h = thread::spawn(move || { 135 | let mut sock = TcpStream::connect(API_SERVER_IP).unwrap(); 136 | sock.set_nonblocking(true).unwrap(); 137 | let mut serializer = MsgpackSerializer::new(); 138 | let mut history = Vec::new(); 139 | for pid in pids { 140 | assert_matches!(serializer.write_msgs(&mut sock, 141 | Some(&ApiClientMsg::GetHistory(pid))), 142 | Ok(true)); 143 | loop { 144 | thread::sleep(std::time::Duration::from_millis(10)); 145 | match serializer.read_msg(&mut sock) { 146 | Ok(None) => (), 147 | Ok(Some(ApiClientMsg::History(h))) => { 148 | if history.len() == 0 { 149 | history = h; 150 | } else { 151 | assert_eq!(history, h); 152 | assert!(history.len() != 0); 153 | } 154 | break; 155 | }, 156 | Ok(val) => { 157 | println!("{:?}", val); 158 | assert!(false) 159 | }, 160 | Err(e) => { 161 | println!("{}", e); 162 | assert!(false) 163 | } 164 | } 165 | } 166 | } 167 | }); 168 | h.join().unwrap(); 169 | } 170 | 171 | fn shutdown(nodes: Vec, 172 | service_pid: Pid, 173 | service_tx: CrSender) 174 | { 175 | let envelope = Envelope::new(service_pid, test_pid(nodes[0].id.clone()), Msg::Shutdown, None); 176 | service_tx.send(envelope).unwrap(); 177 | for node in nodes { 178 | node.shutdown(); 179 | } 180 | } 181 | 182 | fn wait_for_connected_cluster(nodes: &Vec, 183 | poller: &mut Poller, 184 | test_rx: &CrReceiver) { 185 | let start = SteadyTime::now(); 186 | let mut stable_count = 0; 187 | while stable_count < nodes.len() { 188 | stable_count = 0; 189 | for node in nodes { 190 | let correlation_id = CorrelationId::pid(test_pid(node.id.clone())); 191 | node.cluster_status(correlation_id).unwrap(); 192 | // We are only polling on the test channel, so we don't need to know what woke the poller 193 | let notifications = poller.wait(5000).unwrap(); 194 | assert_eq!(1, notifications.len()); 195 | let envelope = test_rx.try_recv().unwrap(); 196 | if let Msg::ClusterStatus(ClusterStatus{established, 197 | num_connections, ..}) = envelope.msg 198 | { 199 | // Ensure that we are in a stable state. We have 2 established connections and no 200 | // non-established connections that may cause established ones to disconnect. 201 | if established.len() == 2 && num_connections == 2 { 202 | println!("Cluster connected in {} ms at {}", 203 | (SteadyTime::now() - start).num_milliseconds(), node.id); 204 | stable_count +=1 ; 205 | } 206 | } 207 | } 208 | } 209 | } 210 | 211 | fn create_replica_pids(nodes: &Vec) -> Vec { 212 | ["replica1", "replica2", "replica3"].iter().zip(nodes).map(|(name, node)| { 213 | Pid { 214 | name: name.to_string(), 215 | group: None, 216 | node: node.id.clone() 217 | } 218 | }).collect() 219 | } 220 | -------------------------------------------------------------------------------- /tests/single_node_chain_replication.rs: -------------------------------------------------------------------------------- 1 | extern crate amy; 2 | extern crate rabble; 3 | #[macro_use] 4 | extern crate assert_matches; 5 | extern crate serde; 6 | 7 | #[macro_use] 8 | extern crate serde_derive; 9 | 10 | mod utils; 11 | 12 | use std::{thread, time}; 13 | use std::net::TcpStream; 14 | use std::str; 15 | use amy::Sender; 16 | 17 | use utils::messages::*; 18 | use utils::replica::Replica; 19 | use utils::api_server; 20 | 21 | use rabble::{ 22 | Pid, 23 | NodeId, 24 | Envelope, 25 | Msg, 26 | Node 27 | }; 28 | use rabble::serialize::{Serialize, MsgpackSerializer}; 29 | 30 | const CLUSTER_SERVER_IP: &'static str = "127.0.0.1:11001"; 31 | const API_SERVER_IP: &'static str = "127.0.0.1:22001"; 32 | 33 | #[test] 34 | fn chain_replication() { 35 | let node_id = NodeId {name: "node1".to_string(), addr: CLUSTER_SERVER_IP.to_string()}; 36 | let test_pid = Pid { name: "test-runner".to_string(), group: None, node: node_id.clone()}; 37 | let (node, mut handles) = rabble::rouse::(node_id, None); 38 | 39 | let pids = create_replica_pids(&node.id); 40 | 41 | let (service_pid, service_tx, service_handle) = api_server::start(node.clone()); 42 | handles.push(service_handle); 43 | 44 | spawn_replicas(&node, &pids); 45 | 46 | run_client_operations(&pids); 47 | 48 | verify_histories(&pids); 49 | 50 | shutdown(node, test_pid, service_pid, service_tx); 51 | 52 | for h in handles { 53 | h.join().unwrap(); 54 | } 55 | 56 | } 57 | 58 | fn shutdown(node: Node, 59 | test_pid: Pid, 60 | service_pid: Pid, 61 | service_tx: Sender>) 62 | { 63 | let shutdown_envelope = Envelope { 64 | to: service_pid, 65 | from: test_pid, 66 | msg: Msg::Shutdown, 67 | correlation_id: None 68 | }; 69 | service_tx.send(shutdown_envelope).unwrap(); 70 | node.shutdown(); 71 | 72 | } 73 | 74 | fn create_replica_pids(node_id: &NodeId) -> Vec { 75 | ["replica1", "replica2", "replica3"].iter().map(|name| { 76 | Pid { 77 | name: name.to_string(), 78 | group: None, 79 | node: node_id.clone() 80 | } 81 | }).collect() 82 | } 83 | 84 | fn spawn_replicas(node: &Node, pids: &Vec) { 85 | // Launch the three replicas participating in chain replication 86 | for i in 0..pids.len() { 87 | let next = if i == pids.len() - 1 { 88 | None 89 | } else { 90 | Some(pids[i + 1].clone()) 91 | }; 92 | 93 | let replica = Box::new(Replica::new(pids[i].clone(), next)); 94 | node.spawn(&pids[i], replica).unwrap(); 95 | } 96 | } 97 | 98 | /// launch 3 clients and send concurrent operations to the head of the chain 99 | fn run_client_operations(pids: &Vec) { 100 | let mut client_handles = Vec::new(); 101 | for i in 0..3 { 102 | let pids = pids.clone(); 103 | let h = thread::spawn(move || { 104 | let mut sock = TcpStream::connect(API_SERVER_IP).unwrap(); 105 | let mut serializer = MsgpackSerializer::new(); 106 | assert_matches!(serializer.write_msgs(&mut sock, 107 | Some(&ApiClientMsg::Op(pids[0].clone(), i))), 108 | Ok(true)); 109 | sock.set_nonblocking(true).unwrap(); 110 | loop { 111 | thread::sleep(time::Duration::from_millis(10)); 112 | match serializer.read_msg(&mut sock) { 113 | Ok(None) => (), 114 | Ok(Some(reply)) => { 115 | assert_eq!(ApiClientMsg::OpComplete, reply); 116 | break; 117 | }, 118 | Err(e) => { 119 | println!("{}", e); 120 | assert!(false) 121 | } 122 | } 123 | } 124 | }); 125 | client_handles.push(h); 126 | } 127 | 128 | for h in client_handles { 129 | h.join().unwrap(); 130 | } 131 | } 132 | 133 | /// Verify that after all client operations have gotten replies that the history of operations in 134 | /// each replica is identical. 135 | fn verify_histories(pids: &Vec) { 136 | let pids = pids.clone(); 137 | let h = thread::spawn(move || { 138 | let mut sock = TcpStream::connect(API_SERVER_IP).unwrap(); 139 | sock.set_nonblocking(true).unwrap(); 140 | let mut serializer = MsgpackSerializer::new(); 141 | let mut history = Vec::new(); 142 | for pid in pids { 143 | assert_matches!(serializer.write_msgs(&mut sock, 144 | Some(&ApiClientMsg::GetHistory(pid))), 145 | Ok(true)); 146 | loop { 147 | thread::sleep(time::Duration::from_millis(10)); 148 | match serializer.read_msg(&mut sock) { 149 | Ok(None) => (), 150 | Ok(Some(ApiClientMsg::History(h))) => { 151 | if history.len() == 0 { 152 | history = h; 153 | } else { 154 | assert_eq!(history, h); 155 | assert!(history.len() != 0); 156 | } 157 | break; 158 | }, 159 | Ok(val) => { 160 | println!("{:?}", val); 161 | assert!(false) 162 | }, 163 | Err(e) => { 164 | println!("{}", e); 165 | assert!(false) 166 | } 167 | } 168 | } 169 | } 170 | }); 171 | h.join().unwrap(); 172 | } 173 | -------------------------------------------------------------------------------- /tests/timeout_tests.rs: -------------------------------------------------------------------------------- 1 | extern crate amy; 2 | extern crate rabble; 3 | 4 | #[macro_use] 5 | extern crate assert_matches; 6 | extern crate serde; 7 | 8 | #[macro_use] 9 | extern crate serde_derive; 10 | 11 | extern crate slog; 12 | extern crate slog_stdlog; 13 | extern crate slog_envlogger; 14 | extern crate slog_term; 15 | extern crate log; 16 | extern crate time; 17 | 18 | mod utils; 19 | 20 | use std::{str}; 21 | use std::net::TcpStream; 22 | use std::sync::mpsc; 23 | use amy::Sender; 24 | use time::Duration; 25 | 26 | use utils::messages::*; 27 | use utils::api_server; 28 | use utils::{ 29 | wait_for, 30 | send 31 | }; 32 | 33 | use rabble::{ 34 | Pid, 35 | Process, 36 | Envelope, 37 | Msg, 38 | Node, 39 | NodeId, 40 | CorrelationId 41 | }; 42 | use rabble::serialize::{Serialize, MsgpackSerializer}; 43 | 44 | const CLUSTER_SERVER_IP: &'static str = "127.0.0.1:11001"; 45 | const API_SERVER_IP: &'static str = "127.0.0.1:22001"; 46 | 47 | #[test] 48 | fn connection_timeout() { 49 | let node_id = NodeId {name: "node1".to_string(), addr: CLUSTER_SERVER_IP.to_string()}; 50 | let (node, mut handles) = rabble::rouse::(node_id, None); 51 | 52 | let (service_pid, service_tx, service_handle) = api_server::start(node.clone()); 53 | handles.push(service_handle); 54 | 55 | run_client_operation_against_nonexistant_pid_and_wait_for_timeout(node.id.clone()); 56 | 57 | shutdown(node, service_pid, service_tx); 58 | 59 | for h in handles { 60 | h.join().unwrap(); 61 | } 62 | } 63 | 64 | struct TestProcess { 65 | pid: Pid, 66 | executor_pid: Option, 67 | 68 | /// Don't do this in production!!! 69 | /// This is only hear to signal to the test that it has received a message. 70 | tx: mpsc::Sender<()> 71 | } 72 | 73 | impl Process<()> for TestProcess { 74 | 75 | fn init(&mut self, executor_pid: Pid) -> Vec> { 76 | self.executor_pid = Some(executor_pid); 77 | // Start a timer with a 100ms timeout and no correlation id. We don't need one 78 | // since there is only one timer in this example 79 | vec![Envelope::new(self.executor_pid.as_ref().unwrap().clone(), 80 | self.pid.clone(), 81 | Msg::StartTimer(100), 82 | None)] 83 | } 84 | 85 | fn handle(&mut self, 86 | msg: Msg<()>, 87 | from: Pid, 88 | correlation_id: Option, 89 | _: &mut Vec>) 90 | { 91 | assert_eq!(from, *self.executor_pid.as_ref().unwrap()); 92 | assert_eq!(msg, Msg::Timeout); 93 | assert_eq!(correlation_id, None); 94 | self.tx.send(()).unwrap(); 95 | } 96 | } 97 | 98 | #[test] 99 | fn process_timeout() { 100 | let node_id = NodeId {name: "node1".to_string(), addr: "127.0.0.1:11002".to_string()}; 101 | let (node, handles) = rabble::rouse::<()>(node_id.clone(), None); 102 | 103 | let pid = Pid { 104 | name: "some-process".to_string(), 105 | group: None, 106 | node: node_id 107 | }; 108 | 109 | let (tx, rx) = mpsc::channel(); 110 | 111 | let process = TestProcess { 112 | pid: pid.clone(), 113 | executor_pid: None, 114 | tx: tx 115 | }; 116 | 117 | node.spawn(&pid, Box::new(process)).unwrap(); 118 | 119 | // Wait for the process to get the timeout 120 | rx.recv().unwrap(); 121 | 122 | node.shutdown(); 123 | for h in handles { 124 | h.join().unwrap(); 125 | } 126 | } 127 | 128 | fn run_client_operation_against_nonexistant_pid_and_wait_for_timeout(node_id: NodeId) { 129 | let pid = Pid {name: "fake-pid".to_string(), group: None, node: node_id}; 130 | let mut sock = TcpStream::connect(API_SERVER_IP).unwrap(); 131 | sock.set_nonblocking(true).unwrap(); 132 | let mut serializer = MsgpackSerializer::new(); 133 | send(&mut sock, &mut serializer, ApiClientMsg::Op(pid, 0)); 134 | assert_eq!(true, wait_for(Duration::seconds(10), || { 135 | if let Ok(Some(ApiClientMsg::Timeout)) = serializer.read_msg(&mut sock) { 136 | return true; 137 | } 138 | false 139 | })); 140 | } 141 | 142 | fn shutdown(node: Node, 143 | service_pid: Pid, 144 | service_tx: Sender>) 145 | { 146 | // A made up pid to represent the test. 147 | let from = Pid {name: "test-runner".to_string(), group: None, node: node.id.clone()}; 148 | let shutdown_envelope = Envelope { 149 | to: service_pid, 150 | from: from, 151 | msg: Msg::Shutdown, 152 | correlation_id: None 153 | }; 154 | service_tx.send(shutdown_envelope).unwrap(); 155 | node.shutdown(); 156 | } 157 | -------------------------------------------------------------------------------- /tests/utils/api_server.rs: -------------------------------------------------------------------------------- 1 | use std::thread::{self, JoinHandle}; 2 | use amy::Sender; 3 | 4 | use rabble::{ 5 | Pid, 6 | Node, 7 | Envelope, 8 | CorrelationId, 9 | Msg, 10 | TcpServerHandler, 11 | Service, 12 | ConnectionMsg, 13 | ConnectionHandler 14 | }; 15 | 16 | use rabble::serialize::MsgpackSerializer; 17 | 18 | use super::messages::{RabbleUserMsg, ApiClientMsg}; 19 | 20 | #[allow(dead_code)] // Not used in all tests 21 | const API_SERVER_IP: &'static str = "127.0.0.1:22001"; 22 | 23 | #[allow(dead_code)] // Not used in all tests 24 | pub fn start(node: Node) 25 | -> (Pid, Sender>, JoinHandle<()>) 26 | { 27 | let server_pid = Pid { 28 | name: "api-server".to_string(), 29 | group: None, 30 | node: node.id.clone() 31 | }; 32 | 33 | // Start the API tcp server 34 | let handler: TcpServerHandler> = 35 | TcpServerHandler::new(server_pid.clone(), API_SERVER_IP, 5000, None); 36 | let mut service = Service::new(server_pid, node, handler).unwrap(); 37 | let service_tx = service.tx.try_clone().unwrap(); 38 | let service_pid = service.pid.clone(); 39 | let h = thread::spawn(move || { 40 | service.wait(); 41 | }); 42 | (service_pid, service_tx, h) 43 | } 44 | 45 | 46 | pub struct ApiServerConnectionHandler { 47 | pid: Pid, 48 | id: u64, 49 | total_requests: u64 50 | } 51 | 52 | impl ConnectionHandler for ApiServerConnectionHandler { 53 | type Msg = RabbleUserMsg; 54 | type ClientMsg = ApiClientMsg; 55 | 56 | fn new(pid: Pid, id: u64) -> ApiServerConnectionHandler { 57 | ApiServerConnectionHandler { 58 | pid: pid, 59 | id: id, 60 | total_requests: 0 61 | } 62 | } 63 | 64 | fn handle_envelope(&mut self, 65 | envelope: Envelope, 66 | output: &mut Vec>) 67 | { 68 | let Envelope {msg, correlation_id, ..} = envelope; 69 | let correlation_id = correlation_id.unwrap(); 70 | match msg { 71 | Msg::User(RabbleUserMsg::History(h)) => { 72 | output.push(ConnectionMsg::Client(ApiClientMsg::History(h), correlation_id)); 73 | }, 74 | Msg::User(RabbleUserMsg::OpComplete) => { 75 | output.push(ConnectionMsg::Client(ApiClientMsg::OpComplete, correlation_id)); 76 | }, 77 | Msg::Timeout => { 78 | output.push(ConnectionMsg::Client(ApiClientMsg::Timeout, correlation_id)); 79 | }, 80 | _ => unreachable!() 81 | } 82 | } 83 | 84 | fn handle_network_msg(&mut self, 85 | msg: ApiClientMsg, 86 | output: &mut Vec>) 87 | { 88 | match msg { 89 | ApiClientMsg::Op(pid, val) => { 90 | output.push(self.new_envelope(pid, RabbleUserMsg::Op(val))); 91 | }, 92 | ApiClientMsg::GetHistory(pid) => { 93 | output.push(self.new_envelope(pid, RabbleUserMsg::GetHistory)); 94 | } 95 | 96 | // We only handle client requests. Client replies come in as Envelopes and are handled 97 | // in handle_envelope(). 98 | _ => unreachable!() 99 | } 100 | } 101 | } 102 | 103 | impl ApiServerConnectionHandler { 104 | pub fn new_envelope(&mut self, to: Pid, user_msg: RabbleUserMsg) 105 | -> ConnectionMsg 106 | { 107 | let msg = Msg::User(user_msg); 108 | let correlation_id = CorrelationId::request(self.pid.clone(), self.id, self.total_requests); 109 | self.total_requests += 1; 110 | ConnectionMsg::Envelope(Envelope::new(to, self.pid.clone(), msg, Some(correlation_id))) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /tests/utils/messages.rs: -------------------------------------------------------------------------------- 1 | use rabble::Pid; 2 | 3 | // Msg type parameter for messages sent to processes and services 4 | #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] 5 | pub enum RabbleUserMsg { 6 | Op(usize), // Request 7 | OpComplete, // Reply 8 | 9 | GetHistory, // Request 10 | History(Vec) // Reply 11 | } 12 | 13 | // Messages sent over the API server TCP connections 14 | #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] 15 | pub enum ApiClientMsg { 16 | Op(Pid, usize), 17 | OpComplete, 18 | GetHistory(Pid), 19 | History(Vec), 20 | Timeout 21 | } 22 | 23 | -------------------------------------------------------------------------------- /tests/utils/mod.rs: -------------------------------------------------------------------------------- 1 | extern crate time; 2 | extern crate slog; 3 | extern crate slog_term; 4 | extern crate slog_envlogger; 5 | extern crate slog_stdlog; 6 | 7 | pub mod replica; 8 | pub mod api_server; 9 | pub mod messages; 10 | 11 | use std::thread::{self, JoinHandle}; 12 | use std::net::TcpStream; 13 | use amy::{Poller, Receiver, Sender}; 14 | use self::slog::DrainExt; 15 | use self::time::{SteadyTime, Duration}; 16 | use utils::messages::*; 17 | use rabble::{ 18 | self, 19 | NodeId, 20 | Node, 21 | Envelope, 22 | Pid, 23 | CorrelationId, 24 | Msg 25 | }; 26 | use rabble::serialize::{Serialize, MsgpackSerializer}; 27 | 28 | type CrNode = Node; 29 | type CrReceiver = Receiver>; 30 | type CrSender = Sender>; 31 | 32 | /// Wait for a function to return true 33 | /// 34 | /// After each call of `f()` that returns `false`, sleep for `sleep_time` 35 | /// Returns true if `f()` returns true before the timeout expires 36 | /// Returns false if the runtime of the test exceeds `timeout` 37 | #[allow(dead_code)] // Not used in all tests 38 | pub fn wait_for(timeout: Duration, mut f: F) -> bool 39 | where F: FnMut() -> bool 40 | { 41 | let sleep_time = Duration::milliseconds(10); 42 | let start = SteadyTime::now(); 43 | while let false = f() { 44 | thread::sleep(sleep_time.to_std().unwrap()); 45 | if SteadyTime::now() - start > timeout { 46 | return false; 47 | } 48 | } 49 | true 50 | } 51 | 52 | /// Send a message over a non-blocking socket 53 | /// Wait for it to finish sending or timeout after 5 seconds 54 | /// In practice the first call to serializer.write_msgs should succeed unless the TCP send buffer is 55 | /// tiny. 56 | #[allow(dead_code)] // Not used in all tests 57 | pub fn send(sock: &mut TcpStream, 58 | serializer: &mut MsgpackSerializer, 59 | msg: ApiClientMsg) 60 | { 61 | if let Ok(true) = serializer.write_msgs(sock, Some(&msg)) { 62 | return; 63 | } 64 | // Just busy wait instead of using a poller in this test. 65 | assert_eq!(true, wait_for(Duration::seconds(5), || { 66 | // We don't know if it's writable, but we want to actually try the write 67 | serializer.set_writable(); 68 | match serializer.write_msgs(sock, None) { 69 | Ok(true) => true, 70 | Ok(false) => false, 71 | Err(e) => { 72 | println!("Failed to write to socket: {}", e); 73 | assert!(false); 74 | unreachable!(); 75 | } 76 | } 77 | })); 78 | } 79 | 80 | 81 | #[allow(dead_code)] // Not used in all tests 82 | pub fn create_node_ids(n: usize) -> Vec { 83 | (1..n + 1).map(|n| { 84 | NodeId { 85 | name: format!("node{}", n), 86 | addr: format!("127.0.0.1:1100{}", n) 87 | } 88 | }).collect() 89 | } 90 | 91 | #[allow(dead_code)] // Not used in all tests 92 | pub fn start_nodes(n: usize) -> (Vec>, Vec>) { 93 | let term = slog_term::streamer().build(); 94 | let drain = slog_envlogger::LogBuilder::new(term) 95 | .filter(None, slog::FilterLevel::Debug).build(); 96 | let root_logger = slog::Logger::root(drain.fuse(), None); 97 | slog_stdlog::set_logger(root_logger.clone()).unwrap(); 98 | create_node_ids(n).into_iter().fold((Vec::new(), Vec::new()), 99 | |(mut nodes, mut handles), node_id| { 100 | let (node, handle_list) = rabble::rouse(node_id, Some(root_logger.clone())); 101 | nodes.push(node); 102 | handles.extend(handle_list); 103 | (nodes, handles) 104 | }) 105 | } 106 | 107 | #[allow(dead_code)] // Not used in all tests 108 | pub fn test_pid(node_id: NodeId) -> Pid { 109 | Pid { 110 | name: "test-runner".to_string(), 111 | group: None, 112 | node: node_id 113 | } 114 | } 115 | 116 | #[allow(dead_code)] // Not used in all tests 117 | pub fn register_test_as_service(poller: &mut Poller, 118 | nodes: &Vec, 119 | test_tx: &CrSender, 120 | test_rx: &CrReceiver) 121 | { 122 | for node in nodes { 123 | let test_pid = test_pid(node.id.clone()); 124 | let correlation_id = CorrelationId::pid(test_pid.clone()); 125 | node.register_service(&test_pid, &test_tx).unwrap(); 126 | // Wait for registration to succeed 127 | loop { 128 | node.cluster_status(correlation_id.clone()).unwrap(); 129 | let notifications = poller.wait(10).unwrap(); 130 | if notifications.len() != 0 { 131 | // We have registered, otherwise we wouldn't have gotten a response 132 | // Let's drain the receiver, because we may have returned from a previous poll 133 | // before the previous ClusterStatus response was sent 134 | while let Ok(envelope) = test_rx.try_recv() { 135 | assert_matches!(envelope.msg, Msg::ClusterStatus(_)); 136 | } 137 | break; 138 | } 139 | } 140 | } 141 | 142 | } 143 | 144 | 145 | -------------------------------------------------------------------------------- /tests/utils/replica.rs: -------------------------------------------------------------------------------- 1 | use rabble::{ 2 | Pid, 3 | Process, 4 | Envelope, 5 | CorrelationId, 6 | Msg 7 | }; 8 | 9 | use super::messages::RabbleUserMsg; 10 | 11 | /// A participant in chain replication 12 | #[allow(dead_code)] // Not used in all tests 13 | pub struct Replica { 14 | pid: Pid, 15 | next: Option, 16 | history: Vec 17 | } 18 | 19 | #[allow(dead_code)] // Not used in all tests 20 | impl Replica { 21 | pub fn new(pid: Pid, next: Option) -> Replica { 22 | Replica { 23 | pid: pid, 24 | next: next, 25 | history: Vec::new() 26 | } 27 | } 28 | } 29 | 30 | impl Process for Replica { 31 | fn handle(&mut self, 32 | msg: Msg, 33 | _from: Pid, 34 | correlation_id: Option, 35 | output: &mut Vec>) 36 | { 37 | let to = correlation_id.as_ref().unwrap().pid.clone(); 38 | let from = self.pid.clone(); 39 | match msg { 40 | Msg::User(RabbleUserMsg::Op(val)) => { 41 | let msg = Msg::User(RabbleUserMsg::OpComplete); 42 | let reply = Envelope::new(to, from, msg, correlation_id.clone()); 43 | 44 | // If there is no next pid send the reply to the original caller in the correlation 45 | // id. Otherwise forward to the next process in the chain. 46 | let envelope = self.next.as_ref().map_or(reply, |to| { 47 | let from = self.pid.clone(); 48 | let msg = Msg::User(RabbleUserMsg::Op(val)); 49 | Envelope::new(to.clone(), from, msg, correlation_id) 50 | }); 51 | 52 | self.history.push(val); 53 | output.push(envelope); 54 | }, 55 | Msg::User(RabbleUserMsg::GetHistory) => { 56 | let msg = Msg::User(RabbleUserMsg::History(self.history.clone())); 57 | let envelope = Envelope::new(to, from, msg, correlation_id); 58 | output.push(envelope); 59 | }, 60 | _ => () 61 | } 62 | } 63 | } 64 | 65 | --------------------------------------------------------------------------------