├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── doc
    └── kmiyc.png
├── include
    └── serialization.proto
├── run.sh
├── src
    ├── bin
    │   ├── rasputinc.rs
    │   └── rasputind.rs
    ├── client
    │   └── mod.rs
    ├── clock.rs
    ├── codec.rs
    ├── etc
    │   └── loadtest.go
    ├── lib.rs
    ├── logging
    │   └── mod.rs
    ├── range_bounds.rs
    ├── serialization.rs
    └── server
    │   ├── acked_log.rs
    │   ├── connset.rs
    │   ├── mod.rs
    │   ├── rocksdb.rs
    │   ├── server.rs
    │   ├── server_conn.rs
    │   └── traffic_cop.rs
└── test
    ├── cluster.rs
    ├── test.rs
    ├── test_client.rs
    └── test_paxos.rs


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled files
 2 | *.o
 3 | *.so
 4 | *.rlib
 5 | *.dll
 6 | *.swp
 7 | *.swo
 8 | *.swn
 9 | 
10 | # Executables
11 | *.exe
12 | 
13 | # Generated by Cargo
14 | /target/
15 | 
16 | # Test cluster
17 | /_test/
18 | 
19 | # rustfmt
20 | *.bk
21 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | [root]
  2 | name = "rasputin"
  3 | version = "0.1.0"
  4 | dependencies = [
  5 |  "bytes 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
  6 |  "docopt 0.6.68 (registry+https://github.com/rust-lang/crates.io-index)",
  7 |  "eligos 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  8 |  "lazy_static 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)",
  9 |  "log 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
 10 |  "mio 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
 11 |  "protobuf 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
 12 |  "quickcheck 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
 13 |  "rand 0.3.10 (registry+https://github.com/rust-lang/crates.io-index)",
 14 |  "rocksdb 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 15 |  "rustc-serialize 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)",
 16 |  "threadpool 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
 17 |  "time 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)",
 18 |  "uuid 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)",
 19 | ]
 20 | 
 21 | [[package]]
 22 | name = "advapi32-sys"
 23 | version = "0.1.2"
 24 | source = "registry+https://github.com/rust-lang/crates.io-index"
 25 | dependencies = [
 26 |  "winapi 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 27 |  "winapi-build 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 28 | ]
 29 | 
 30 | [[package]]
 31 | name = "aho-corasick"
 32 | version = "0.3.0"
 33 | source = "registry+https://github.com/rust-lang/crates.io-index"
 34 | dependencies = [
 35 |  "memchr 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 36 | ]
 37 | 
 38 | [[package]]
 39 | name = "bitflags"
 40 | version = "0.1.1"
 41 | source = "registry+https://github.com/rust-lang/crates.io-index"
 42 | 
 43 | [[package]]
 44 | name = "bytes"
 45 | version = "0.2.11"
 46 | source = "registry+https://github.com/rust-lang/crates.io-index"
 47 | 
 48 | [[package]]
 49 | name = "clock_ticks"
 50 | version = "0.0.5"
 51 | source = "registry+https://github.com/rust-lang/crates.io-index"
 52 | dependencies = [
 53 |  "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
 54 | ]
 55 | 
 56 | [[package]]
 57 | name = "docopt"
 58 | version = "0.6.68"
 59 | source = "registry+https://github.com/rust-lang/crates.io-index"
 60 | dependencies = [
 61 |  "regex 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
 62 |  "rustc-serialize 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)",
 63 |  "strsim 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
 64 | ]
 65 | 
 66 | [[package]]
 67 | name = "eligos"
 68 | version = "0.1.0"
 69 | source = "registry+https://github.com/rust-lang/crates.io-index"
 70 | dependencies = [
 71 |  "bytes 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
 72 |  "mio 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
 73 |  "rand 0.3.10 (registry+https://github.com/rust-lang/crates.io-index)",
 74 | ]
 75 | 
 76 | [[package]]
 77 | name = "kernel32-sys"
 78 | version = "0.1.3"
 79 | source = "registry+https://github.com/rust-lang/crates.io-index"
 80 | dependencies = [
 81 |  "winapi 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 82 |  "winapi-build 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 83 | ]
 84 | 
 85 | [[package]]
 86 | name = "lazy_static"
 87 | version = "0.1.14"
 88 | source = "registry+https://github.com/rust-lang/crates.io-index"
 89 | 
 90 | [[package]]
 91 | name = "libc"
 92 | version = "0.1.8"
 93 | source = "registry+https://github.com/rust-lang/crates.io-index"
 94 | 
 95 | [[package]]
 96 | name = "log"
 97 | version = "0.3.1"
 98 | source = "registry+https://github.com/rust-lang/crates.io-index"
 99 | dependencies = [
100 |  "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
101 | ]
102 | 
103 | [[package]]
104 | name = "memchr"
105 | version = "0.1.3"
106 | source = "registry+https://github.com/rust-lang/crates.io-index"
107 | dependencies = [
108 |  "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
109 | ]
110 | 
111 | [[package]]
112 | name = "mio"
113 | version = "0.4.2"
114 | source = "registry+https://github.com/rust-lang/crates.io-index"
115 | dependencies = [
116 |  "bytes 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
117 |  "clock_ticks 0.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
118 |  "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
119 |  "log 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
120 |  "nix 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
121 |  "slab 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
122 |  "winapi 0.1.23 (registry+https://github.com/rust-lang/crates.io-index)",
123 | ]
124 | 
125 | [[package]]
126 | name = "nix"
127 | version = "0.3.9"
128 | source = "registry+https://github.com/rust-lang/crates.io-index"
129 | dependencies = [
130 |  "bitflags 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
131 |  "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
132 | ]
133 | 
134 | [[package]]
135 | name = "protobuf"
136 | version = "1.0.4"
137 | source = "registry+https://github.com/rust-lang/crates.io-index"
138 | 
139 | [[package]]
140 | name = "quickcheck"
141 | version = "0.2.21"
142 | source = "registry+https://github.com/rust-lang/crates.io-index"
143 | dependencies = [
144 |  "log 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
145 |  "rand 0.3.10 (registry+https://github.com/rust-lang/crates.io-index)",
146 | ]
147 | 
148 | [[package]]
149 | name = "rand"
150 | version = "0.3.10"
151 | source = "registry+https://github.com/rust-lang/crates.io-index"
152 | dependencies = [
153 |  "advapi32-sys 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
154 |  "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
155 |  "winapi 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
156 | ]
157 | 
158 | [[package]]
159 | name = "regex"
160 | version = "0.1.41"
161 | source = "registry+https://github.com/rust-lang/crates.io-index"
162 | dependencies = [
163 |  "aho-corasick 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
164 |  "memchr 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
165 |  "regex-syntax 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
166 | ]
167 | 
168 | [[package]]
169 | name = "regex-syntax"
170 | version = "0.2.1"
171 | source = "registry+https://github.com/rust-lang/crates.io-index"
172 | 
173 | [[package]]
174 | name = "rocksdb"
175 | version = "0.1.1"
176 | source = "registry+https://github.com/rust-lang/crates.io-index"
177 | dependencies = [
178 |  "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
179 | ]
180 | 
181 | [[package]]
182 | name = "rustc-serialize"
183 | version = "0.3.15"
184 | source = "registry+https://github.com/rust-lang/crates.io-index"
185 | 
186 | [[package]]
187 | name = "slab"
188 | version = "0.1.2"
189 | source = "registry+https://github.com/rust-lang/crates.io-index"
190 | 
191 | [[package]]
192 | name = "strsim"
193 | version = "0.3.0"
194 | source = "registry+https://github.com/rust-lang/crates.io-index"
195 | 
196 | [[package]]
197 | name = "threadpool"
198 | version = "0.1.4"
199 | source = "registry+https://github.com/rust-lang/crates.io-index"
200 | 
201 | [[package]]
202 | name = "time"
203 | version = "0.1.31"
204 | source = "registry+https://github.com/rust-lang/crates.io-index"
205 | dependencies = [
206 |  "kernel32-sys 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
207 |  "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
208 |  "winapi 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
209 | ]
210 | 
211 | [[package]]
212 | name = "uuid"
213 | version = "0.1.17"
214 | source = "registry+https://github.com/rust-lang/crates.io-index"
215 | dependencies = [
216 |  "rand 0.3.10 (registry+https://github.com/rust-lang/crates.io-index)",
217 |  "rustc-serialize 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)",
218 | ]
219 | 
220 | [[package]]
221 | name = "winapi"
222 | version = "0.1.23"
223 | source = "registry+https://github.com/rust-lang/crates.io-index"
224 | dependencies = [
225 |  "libc 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
226 | ]
227 | 
228 | [[package]]
229 | name = "winapi"
230 | version = "0.2.1"
231 | source = "registry+https://github.com/rust-lang/crates.io-index"
232 | 
233 | [[package]]
234 | name = "winapi-build"
235 | version = "0.1.0"
236 | source = "registry+https://github.com/rust-lang/crates.io-index"
237 | 
238 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | 
 3 | name = "rasputin"
 4 | description = "Hard to kill transactional distributed database"
 5 | version = "0.2.0"
 6 | authors = [
 7 |   "Tyler Neely <t@jujit.su>",
 8 |   "Steve Salevan <steve.salevan@gmail.com>"
 9 | ]
10 | license = "Apache-2.0"
11 | homepage = "https://github.com/the-tetanus-clinic/rasputin"
12 | keywords = ["database", "HA", "transactions", "distributed-systems", "paxos"]
13 | 
14 | [[test]]
15 | 
16 | name = "test"
17 | path = "test/test.rs"
18 | 
19 | [dependencies]
20 | bytes = "0.2.11"
21 | docopt = "0.6.66"
22 | lazy_static="0.1.14"
23 | log = "0.3.1"
24 | mio = "0.4.2"
25 | rand = "0.3"
26 | rocksdb = "~0.1.1"
27 | rustc-serialize = "0.3.15"
28 | time = "0.1"
29 | uuid = "0.1"
30 | protobuf = "1.0.16"
31 | threadpool = "0.1.4"
32 | 
33 | [dev-dependencies]
34 | quickcheck = "*"
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Rasputin DB :globe_with_meridians:
  2 | 
  3 | (Significant work is currently happening in the `tyler_ranges` branch)
  4 | 
  5 | flexible linearizable distributed store
  6 | 
  7 | triumvirs: operational clarity, performance and composability
  8 | 
  9 | currently implemented: linearized KV set/get/cas/del.  client code is happy-path only, so it's only fit for playing around with at this point!
 10 | 
 11 | current reasons why you don't want to use this beyond playing with it:
 12 | 
 13 | 1. Mostly unimplemented.  We don't have support for automatic resharding, real transactions or collection types other than KV yet.  These are still in the planning phase.
 14 | 1. Possibly incorrect.  We have not yet proven the correctness of the core consensus algorithm.  We may be able to adapt the Raft Coq proof to this end, as we are essentially replacing Raft's preemptible leadership with a non-preempting lease to improve throughput in the presence of partial partitions.
 15 | 1. Inefficient.  The write path involves a TON of copying.  We are in the process of designing a much more efficient buffer management system.
 16 | 1. Buggy.  Client code is a complete hack that only occasionally works.  We have a simulator in place for teasing out bugs in the state machine, but we haven't used it for simulating common datacenter conditions like partitions, delayed message arrival, node restarts/shutdowns/pauses, etc...
 17 | 1. Undocumented.
 18 | 1. Unpopular.  No community and no production users (or, at least I hope nobody is using it yet!).
 19 | 
 20 | ## Running
 21 | 
 22 | ###### Run a test cluster
 23 | 
 24 | ```
 25 | cargo build
 26 | ./run.sh
 27 | tail -f _rasputin_test/*log
 28 | ```
 29 | 
 30 | ###### Run an individual server
 31 | 
 32 | ```
 33 | target/debug/rasputind \
 34 |     --peer-port=7777 \
 35 |     --cli-port=8888 \
 36 |     --seed-peers="127.0.0.1:7777" \
 37 |     --storage-dir=/var/lib/rasputin/ \
 38 |     --logfile=/var/log/rasputin.log
 39 | ```
 40 | 
 41 | ###### Hit the cluster with a remote client!
 42 | 
 43 | Cargo.toml:
 44 | 
 45 | ```
 46 | [dependencies]
 47 | rasputin = "0.1.0"
 48 | ```
 49 | 
 50 | Code:
 51 | ```rust
 52 | extern crate rasputin;
 53 | 
 54 | fn main() {
 55 |     let peers = vec!["127.0.0.1:8888".parse().unwrap()];
 56 |     let nthreads = 1;
 57 |     let mut cli = rasputin::Client::new(peers, nthreads);
 58 | 
 59 |     cli.set(b"k1", b"v1").unwrap();
 60 |     assert!(cli.get(b"k1").unwrap().get_value() == b"v1");
 61 | 
 62 |     // CAS returns the current value, and sets the success flag accordingly
 63 |     assert!(cli.cas(b"k1", b"v1", b"v12").unwrap().get_value() == b"v12");
 64 |     assert!(cli.cas(b"k1", b"vNever", b"vNever2").unwrap().get_value() == b"v12");
 65 |     assert!(cli.cas(b"k1", b"vNever", b"vNever2").unwrap().get_success() == false);
 66 |     assert!(cli.cas(b"k1", b"v12", b"v13").unwrap().get_value() == b"v13");
 67 | 
 68 |     // deletes return the last value
 69 |     assert!(cli.del(b"k1").unwrap().get_value() == b"v13");
 70 |     assert!(cli.get(b"k1").unwrap().get_success() == false);
 71 | }
 72 | ```
 73 | 
 74 | ## Planned Work
 75 | 
 76 | ###### automatic lexicographic resharding
 77 | 
 78 | Rasputin will utilize shard size and request density metrics to faciliate intelligent splitting.
 79 | 
 80 | ###### several simple persistent collection types
 81 | 
 82 | 1. kv: backed by RocksDB
 83 | 2. log: Kafka-like sequential segment files
 84 | 3. object: files on system VFS
 85 | 
 86 | ###### interest semantics
 87 | 
 88 | * subscribe: in-order mutation stream
 89 | * watch: at most once mutation notification
 90 | 
 91 | ###### replication modes (per-collection)
 92 | 
 93 | 1. consensus: mutations block on replication to a quorum
 94 | 2. async: mutations return quickly, and are replicated later
 95 | 
 96 | ###### timeseries primitives
 97 | 
 98 | * logarithmically bucketed histograms for efficient aggregation and consumption of extremely high velocity metrics, a la [loghisto](github.com/spacejam/loghisto).
 99 | 
100 | ## roadmap
101 | - [x] mio event loops
102 | - [x] leader election
103 | - [x] rocksdb persistence layer
104 | - [x] log replication
105 | - [x] multipaxos consensus
106 | - [x] simple KV client operations
107 | - [ ] reconfigurable membership
108 | - [ ] range splitting
109 | - [ ] mesos framework
110 | - [ ] c/jvm/python/ruby/go client libs
111 | 
112 | ## Appendix: The Harpoon Consensus Algorithm
113 | 
114 | Because Rasputin aims to be as general purpose of a replication mechanism as possible, it needs to be resilient against partitions.  We aim to reuse the parts of Raft that work for this as much as we can, and replace the leader election mechanism with a lease-based one that does not preempt in the presence of partial partitions.  This obviously needs to be tested extensively, and to that end a comprehensive simulator is being built for testing the state machine (see test/cluster.rs), and fault injection tooling is being built for inducing realistic datacenter conditions on a non-simulated cluster.
115 | 
116 | Raft is vulnerable to rapid leader churn when a partial partition exists between the leader and any other node.  The partially partitioned node will fire its leader election timer and receive quorum.  Because the old leader can't talk to this new leader, it will do the same.  Leadership bounces a lot and we have suboptimal throughput.  Harpoon is essentially just Raft with a modified election mechanism: candidates and leaders request leases from all peers, extend leadership if they reach quorum, and abdicate if they do not reach a quorum of successful extension request responses by the end of their lease.  This prevents leadership churn in scenarios where there is a partial partition, which is common over the open internet, for example.
117 | 
118 | Harpoon has not yet been formally verified, but eventually we will adapt the Raft Coq proof for it.
119 | 
120 | 


--------------------------------------------------------------------------------
/doc/kmiyc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spacejam/rasputin/31249108bc073d2212f9812533a810b6652e2ebd/doc/kmiyc.png


--------------------------------------------------------------------------------
/include/serialization.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto2";
  2 | 
  3 | package rasputin;
  4 | 
  5 | //
  6 | // Client <-> rasputin server messages
  7 | //
  8 | message SetReq {
  9 |   required bytes key = 1;
 10 |   required bytes value = 2;
 11 | }
 12 | 
 13 | message SetRes {
 14 |   required bool success = 1;
 15 |   required uint64 txid = 2;
 16 |   optional string err = 3;
 17 | }
 18 | 
 19 | message GetReq {
 20 |   required bytes key = 1;
 21 | }
 22 | 
 23 | message GetRes {
 24 |   required bool success = 1;
 25 |   required uint64 txid = 2;
 26 |   optional bytes value = 3;
 27 |   optional string err = 4;
 28 | }
 29 | 
 30 | message CASReq {
 31 |   required bytes key = 1;
 32 |   optional bytes new_value = 2;
 33 |   optional bytes old_value = 3;
 34 | }
 35 | 
 36 | message CASRes {
 37 |   required bool success = 1;
 38 |   required uint64 txid = 2;
 39 |   optional bytes value = 3;
 40 |   optional string err = 4;
 41 | }
 42 | 
 43 | message DelReq {
 44 |   required bytes key = 1;
 45 | }
 46 | 
 47 | message DelRes {
 48 |   required bool success = 1;
 49 |   required uint64 txid = 2;
 50 |   required bytes value = 3;
 51 |   optional string err = 4;
 52 | }
 53 | 
 54 | message WatchReq {
 55 |   required bytes key = 1;
 56 |   required uint64 last_txid = 2;
 57 |   required bool recursive = 3;
 58 |   required bool historical = 4;
 59 | }
 60 | 
 61 | message WatchRes {
 62 |   required bool success = 1;
 63 |   repeated Mutation history = 2;
 64 |   optional string err = 3;
 65 | }
 66 | 
 67 | message RedirectRes {
 68 |   required bool success = 1;
 69 |   optional string address = 2;
 70 |   optional string err = 3;
 71 | }
 72 | 
 73 | // datatypes
 74 | enum MutationType {
 75 |   KVSET = 1;
 76 |   KVCAS = 2;
 77 |   KVDEL = 3;
 78 | }
 79 | 
 80 | message Mutation {
 81 |   required MutationType type = 1;
 82 |   required Version version = 2;
 83 |   required bytes key = 3;
 84 |   optional bytes value = 4;
 85 |   optional bytes old_value = 5;
 86 | }
 87 | 
 88 | message Version {
 89 |   required uint64 txid = 1;
 90 |   required uint64 term = 2;
 91 | }
 92 | 
 93 | // client top-levl API
 94 | message CliReq {
 95 |   required uint64 req_id = 1;
 96 |   optional GetReq get = 2;
 97 |   optional SetReq set = 3;
 98 |   optional CASReq cas = 4;
 99 |   optional DelReq del = 5;
100 |   optional WatchReq watch = 6;
101 | }
102 | 
103 | message CliRes {
104 |   required uint64 req_id = 1;
105 |   optional GetRes get = 2;
106 |   optional SetRes set = 3;
107 |   optional CASRes cas = 4;
108 |   optional DelRes del = 5;
109 |   optional WatchRes watch = 6;
110 |   optional RedirectRes redirect = 7;
111 | }
112 | 
113 | //
114 | // Leadership
115 | //
116 | message VoteReq {
117 |   required uint64 term = 1;
118 |   required uint64 last_learned_term = 2;
119 |   required uint64 last_learned_txid = 3;
120 |   required uint64 last_accepted_term = 4;
121 |   required uint64 last_accepted_txid = 5;
122 | }
123 | 
124 | message VoteRes {
125 |   required bool success = 1;
126 |   required uint64 term = 2;
127 | }
128 | 
129 | //
130 | // Replication
131 | //
132 | message Append {
133 |   required uint64 from_txid = 1;
134 |   required uint64 from_term = 2;
135 |   repeated Mutation batch = 3;
136 |   required uint64 last_learned_txid = 4;
137 | }
138 | 
139 | message AppendRes {
140 |   required bool accepted = 1;
141 |   optional uint64 last_accepted_txid = 2;
142 |   optional uint64 last_accepted_term = 3;
143 | }
144 | 
145 | // server<->server top-level api
146 | message PeerMsg {
147 |   required string srvid = 1;
148 |   optional VoteReq vote_req = 2;
149 |   optional VoteRes vote_res = 3;
150 |   optional Append append = 4;
151 |   optional AppendRes append_res = 5;
152 | }
153 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | peers="127.0.0.1:7770,127.0.0.1:7771,127.0.0.1:7772,127.0.0.1:7773,127.0.0.1:7774"
 3 | for i in {0..4}; do
 4 |   mkdir -p _rasputin_test/$i/data
 5 |   RUST_BACKTRACE=1 target/debug/rasputind \
 6 |     --peer-port=777$i \
 7 |     --cli-port=888$i \
 8 |     --seed-peers=$peers \
 9 |     --storage-dir=_rasputin_test/$i/data \
10 |     --logfile=_rasputin_test/$i.log &
11 | done
12 | 


--------------------------------------------------------------------------------
/src/bin/rasputinc.rs:
--------------------------------------------------------------------------------
 1 | extern crate rustc_serialize;
 2 | extern crate docopt;
 3 | #[macro_use] extern crate log;
 4 | extern crate rasputin;
 5 | 
 6 | use std::net::SocketAddr;
 7 | use std::process;
 8 | 
 9 | use rasputin::Client;
10 | use docopt::Docopt;
11 | 
12 | static USAGE: &'static str = "
13 | rasputinc - client for rasputin.
14 | 
15 | This program is the Rasputin DB command line client.
16 | 
17 | Usage:
18 |     rasputinc --help
19 |     rasputinc [--peers=<peers>] [--get=<key>] [--set=<key>,<value>] [--cas=<key>,<oldvalue>,<value>] [--del=<key>]
20 | 
21 | Options:
22 |     --help                          Show this help message.
23 |     --peers=<host1:port1,...>       List of comma-delimited peers, e.g:
24 |                                     foo.baz.com:8888,bar.baz.com:8888
25 |     --get=<key>                     Get the current value for <key>, if set.
26 |     --set=<key,value>               Set the key <key> to <value>.
27 |     --cas=<key,oldvalue,value>      Attempt an atomic compare and swap.
28 |     --del=<key>                     Delete the current value for <key>, if set.
29 | ";
30 | 
31 | fn main() {
32 |     let args: Args = Docopt::new(USAGE)
33 |         .and_then(|d| d.decode())
34 |         .unwrap_or_else(|e| e.exit());
35 | 
36 |     let peers: Vec<SocketAddr> = args.flag_peers.unwrap_or("127.0.0.1:8888".to_string())
37 |         .split(",")
38 |         .map(|s| s.parse().unwrap())
39 |         .collect();
40 | 
41 |     let nthreads = 1;
42 |     let mut cli = Client::new(peers, nthreads);
43 | 
44 |     args.flag_set.map(|kv: String| {
45 |         let kvs: Vec<&str> = kv.splitn(2, ",").take(2).collect();
46 |         if kvs.len() != 2 {
47 |             println!("{}", USAGE);
48 |             process::exit(1);
49 |         }
50 |         let (k, v) = (kvs[0], kvs[1]);
51 |         cli.set(k.as_bytes(), v.as_bytes()).unwrap();
52 |     });
53 | }
54 | 
55 | #[derive(Debug, RustcDecodable)]
56 | struct Args {
57 |     flag_help: bool,
58 |     flag_peers: Option<String>,
59 |     flag_set: Option<String>,
60 |     flag_get: Option<String>,
61 |     flag_cas: Option<String>,
62 |     flag_del: Option<String>,
63 | }
64 | 


--------------------------------------------------------------------------------
/src/bin/rasputind.rs:
--------------------------------------------------------------------------------
 1 | extern crate rustc_serialize;
 2 | extern crate mio;
 3 | extern crate docopt;
 4 | #[macro_use]
 5 | extern crate log;
 6 | extern crate rasputin;
 7 | 
 8 | use std::sync::mpsc::SendError;
 9 | 
10 | use log::LogLevel;
11 | use docopt::Docopt;
12 | 
13 | use rasputin::server::{Server, Envelope};
14 | use rasputin::RealClock;
15 | 
16 | static USAGE: &'static str = "
17 | rasputin - HA transactional store with a focus on usability, stability and performance.
18 | 
19 | This program is the Rasputin DB server process.
20 | 
21 | Usage:
22 |     rasputind --help
23 |     rasputind [--cli-port=<listening port>] [--peer-port=<listening port>] [--seed-peers=<peers>] [--logfile=<file>] [--storage-dir=<directory>]
24 | 
25 | Options:
26 |     --help                          Show this help message.
27 |     --cli-port=<port>               Listening port for communication between servers.
28 |     --peer-port=<port>              Listening port for communication with clients.
29 |     --seed-peers=<host1:port1,...>  List of comma-delimited initial peers, e.g:
30 |                                     foo.baz.com:7777,bar.baz.com:7777
31 |     --logfile=<path>                File to log output to instead of stdout.
32 |     --storage-dir=<path>            Directory to store the persisted data in; defaults to /var/lib/rasputin
33 | ";
34 | 
35 | fn main() {
36 |     let args: Args = Docopt::new(USAGE)
37 |         .and_then(|d| d.decode())
38 |         .unwrap_or_else(|e| e.exit());
39 | 
40 |     rasputin::logging::init_logger(args.flag_logfile, LogLevel::Info).unwrap();
41 |     print_banner();
42 | 
43 |     let peer_port: u16 = match args.flag_peer_port {
44 |         Some(p) => p,
45 |         None => 7770,
46 |     };
47 | 
48 |     let cli_port: u16 = match args.flag_cli_port {
49 |         Some(p) => p,
50 |         None => 8880,
51 |     };
52 | 
53 |     let storage_dir: String = match args.flag_storage_dir {
54 |         Some(d) => d,
55 |         None => "/var/lib/rasputin".to_string(),
56 |     };
57 | 
58 |     let seed_peers: Vec<String> = args.flag_seed_peers
59 |         .split(",")
60 |         .map(|s| s.to_string())
61 |         .filter(|s| s != "")
62 |         .collect();
63 | 
64 |     Server::<RealClock, Result<(), SendError<Envelope>>>
65 |           ::run(peer_port, cli_port, storage_dir, seed_peers);
66 | }
67 | 
68 | #[derive(Debug, RustcDecodable)]
69 | struct Args {
70 |     flag_help: bool,
71 |     flag_cli_port: Option<u16>,
72 |     flag_peer_port: Option<u16>,
73 |     flag_seed_peers: String,
74 |     flag_logfile: Option<String>,
75 |     flag_storage_dir: Option<String>,
76 | }
77 | 
78 | fn print_banner() {
79 |     info!("
80 |  )xxxxx[:::::::::>
81 |   ______ _______ _______  _____  _     _ _______ _____ __   _
82 |  |_____/ |_____| |______ |_____] |     |    |      |   | \\  |
83 |  |    \\_ |     | ______| |       |_____|    |    __|__ |  \\_|");
84 | }
85 | 


--------------------------------------------------------------------------------
/src/client/mod.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::BTreeMap;
  2 | use std::io::{self, Error, ErrorKind};
  3 | use std::net::SocketAddr;
  4 | use std::sync::mpsc::channel;
  5 | 
  6 | use bytes::{Buf, ByteBuf};
  7 | use threadpool::ThreadPool;
  8 | use protobuf::{self, Message};
  9 | use mio::{TryRead, TryWrite};
 10 | use mio::tcp::TcpStream;
 11 | 
 12 | use {CliReq, CliRes, GetReq, GetRes, RangeBounds, RedirectRes, SetReq,
 13 |      SetRes, Version, CASReq, CASRes, DelReq, DelRes};
 14 | use codec::{self, Codec, Framed};
 15 | 
 16 | pub struct Client {
 17 |     servers: Vec<SocketAddr>,
 18 |     ranges: BTreeMap<RangeBounds, SocketAddr>,
 19 |     pool: ThreadPool,
 20 |     req_counter: u64,
 21 | }
 22 | 
 23 | impl Client {
 24 |     pub fn new(servers: Vec<SocketAddr>, nthreads: usize) -> Client {
 25 |         Client {
 26 |             servers: servers,
 27 |             ranges: BTreeMap::new(),
 28 |             pool: ThreadPool::new(nthreads),
 29 |             req_counter: 0,
 30 |         }
 31 |     }
 32 | 
 33 |     fn get_id(&mut self) -> u64 {
 34 |         self.req_counter += 1;
 35 |         self.req_counter
 36 |     }
 37 | 
 38 |     pub fn set<'a>(
 39 |         &mut self,
 40 |         key: &'a [u8],
 41 |         value: &'a [u8]
 42 |     ) -> io::Result<SetRes> {
 43 | 
 44 |         let mut set = SetReq::new();
 45 |         set.set_key(key.to_vec());
 46 |         set.set_value(value.to_vec());
 47 |         let mut req = CliReq::new();
 48 |         req.set_set(set);
 49 |         req.set_req_id(self.get_id());
 50 | 
 51 |         self.req(key.to_vec(), req).map(|cli_res| {
 52 |             let set_res = cli_res.get_set();
 53 |             debug!("got response success: {} txid: {} err: {}",
 54 |                      set_res.get_success(),
 55 |                      set_res.get_txid(),
 56 |                      set_res.get_err());
 57 |             cli_res.get_set().clone()
 58 |         })
 59 |     }
 60 | 
 61 |     pub fn get<'a>(
 62 |         &mut self,
 63 |         key: &'a [u8],
 64 |     ) -> io::Result<GetRes> {
 65 | 
 66 |         let mut get = GetReq::new();
 67 |         get.set_key(key.to_vec());
 68 |         let mut req = CliReq::new();
 69 |         req.set_get(get);
 70 |         req.set_req_id(self.get_id());
 71 | 
 72 |         self.req(key.to_vec(), req).map(|cli_res| {
 73 |             let get_res = cli_res.get_get();
 74 |             debug!("got response success: {} txid: {} err: {}",
 75 |                      get_res.get_success(),
 76 |                      get_res.get_txid(),
 77 |                      get_res.get_err());
 78 |             cli_res.get_get().clone()
 79 |         })
 80 |     }
 81 | 
 82 |     pub fn cas<'a>(
 83 |         &mut self,
 84 |         key: &'a [u8],
 85 |         old_value: &'a [u8],
 86 |         new_value: &'a [u8]
 87 |     ) -> io::Result<CASRes> {
 88 | 
 89 |         let mut cas = CASReq::new();
 90 |         cas.set_key(key.to_vec());
 91 |         cas.set_old_value(old_value.to_vec());
 92 |         cas.set_new_value(new_value.to_vec());
 93 |         let mut req = CliReq::new();
 94 |         req.set_cas(cas);
 95 |         req.set_req_id(self.get_id());
 96 | 
 97 |         self.req(key.to_vec(), req).map(|cli_res| {
 98 |             let cas_res = cli_res.get_cas();
 99 |             debug!("got response success: {} txid: {} err: {}",
100 |                      cas_res.get_success(),
101 |                      cas_res.get_txid(),
102 |                      cas_res.get_err());
103 |             cli_res.get_cas().clone()
104 |         })
105 |     }
106 | 
107 |     pub fn del<'a>(
108 |         &mut self,
109 |         key: &'a [u8],
110 |     ) -> io::Result<DelRes> {
111 | 
112 |         let mut del = DelReq::new();
113 |         del.set_key(key.to_vec());
114 |         let mut req = CliReq::new();
115 |         req.set_del(del);
116 |         req.set_req_id(self.get_id());
117 | 
118 |         self.req(key.to_vec(), req).map(|cli_res| {
119 |             let del_res = cli_res.get_del();
120 |             debug!("got response success: {} txid: {} err: {}",
121 |                      del_res.get_success(),
122 |                      del_res.get_txid(),
123 |                      del_res.get_err());
124 |             cli_res.get_del().clone()
125 |         })
126 |     }
127 | 
128 |     fn req(&mut self, key: Vec<u8>, req: CliReq) -> io::Result<CliRes> {
129 |         // send to a peer, they'll redirect us if we're wrong
130 |         for peer in self.servers.iter() {
131 |             debug!("trying peer {:?}", peer);
132 |             let mut stream_attempt = TcpStream::connect(&peer);
133 |             if stream_attempt.is_err() {
134 |                 continue;
135 |             }
136 | 
137 |             let mut stream = stream_attempt.unwrap();
138 |             let mut codec = Framed::new();
139 |             let mut msg =
140 |                 codec.encode(ByteBuf::from_slice(&*req.write_to_bytes()
141 |                                                       .unwrap()));
142 | 
143 |             if send_to(&mut stream, &mut msg).is_err() {
144 |                 debug!("could not send");
145 |                 continue;
146 |             }
147 |             match recv_into(&mut stream, &mut codec) {
148 |                 Ok(res_buf) => {
149 |                     let res: &[u8] = res_buf.bytes();
150 |                     let cli_res: CliRes = protobuf::parse_from_bytes(res)
151 |                                               .unwrap();
152 |                     if cli_res.has_redirect() {
153 |                         debug!("we got redirect to {}!",
154 |                                  cli_res.get_redirect().get_address());
155 |                         // TODO(tyler) try redirected host next
156 |                         continue;
157 |                     }
158 |                     return Ok(cli_res);
159 |                 }
160 |                 Err(e) => {
161 |                     debug!("got err on recv_into: {}", e);
162 |                     continue;
163 |                 }
164 |             }
165 |         }
166 |         Err(Error::new(ErrorKind::Other, "unable to reach any servers!"))
167 |     }
168 | }
169 | 
170 | fn send_to(stream: &mut TcpStream, buf: &mut ByteBuf) -> io::Result<()> {
171 |     loop {
172 |         match stream.try_write_buf(buf) {
173 |             Ok(None) => {
174 |                 continue;
175 |             }
176 |             Ok(Some(r)) => {
177 |                 if buf.remaining() == 0 {
178 |                     return Ok(());
179 |                 }
180 |             }
181 |             Err(e) => {
182 |                 match e.raw_os_error() {
183 |                     Some(32) => {
184 |                         debug!("client disconnected");
185 |                     }
186 |                     Some(e) =>
187 |                         debug!("not implemented; client os err={:?}", e),
188 |                     _ => debug!("not implemented; client err={:?}", e),
189 |                 };
190 |                 // Don't reregister.
191 |                 return Err(e);
192 |             }
193 |         }
194 |     }
195 | }
196 | 
197 | fn recv_into<T>(stream: &mut TcpStream,
198 |                 codec: &mut Codec<ByteBuf, T>)
199 |                 -> io::Result<T> {
200 |     loop {
201 |         let mut res_buf = ByteBuf::mut_with_capacity(1024);
202 |         match stream.try_read_buf(&mut res_buf) {
203 |             Ok(None) => {
204 |                 //debug!("got readable, but can't read from the socket");
205 |             }
206 |             Ok(Some(r)) => {
207 |                 //debug!("CONN : we read {} bytes!", r);
208 |             }
209 |             Err(e) => {
210 |                 debug!("not implemented; client err={:?}", e);
211 |             }
212 |         }
213 |         let mut r: Vec<T> = codec.decode(&mut res_buf.flip());
214 |         if r.len() == 1 {
215 |             let res_buf = r.pop().unwrap();
216 |             return Ok(res_buf)
217 |         }
218 |     }
219 | }
220 | 


--------------------------------------------------------------------------------
/src/clock.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::RwLock;
 2 | use std::thread;
 3 | 
 4 | use time;
 5 | 
 6 | pub trait Clock {
 7 |     fn now(&self) -> time::Timespec;
 8 |     fn sleep_ms(&self, ms: u32);
 9 | }
10 | 
11 | pub struct RealClock;
12 | 
13 | unsafe impl Sync for RealClock{}
14 | 
15 | impl Clock for RealClock {
16 |     fn now(&self) -> time::Timespec {
17 |         time::now().to_timespec()
18 |     }
19 | 
20 |     fn sleep_ms(&self, ms: u32) {
21 |         thread::sleep_ms(ms)
22 |     }
23 | }
24 | 
25 | pub struct TestClock {
26 |     inner: RwLock<time::Timespec>,
27 | }
28 | 
29 | impl TestClock {
30 |     pub fn new() -> TestClock {
31 |         TestClock { inner: RwLock::new(time::Timespec { sec: 0, nsec: 0 }) }
32 |     }
33 | }
34 | 
35 | impl Clock for TestClock {
36 |     fn now(&self) -> time::Timespec {
37 |         let inner = self.inner.read().unwrap();
38 |         *inner
39 |     }
40 | 
41 |     fn sleep_ms(&self, ms: u32) {
42 |         let mut inner = self.inner.write().unwrap();
43 |         let ns = (ms % 1e6 as u32) * 1e6 as u32;
44 |         inner.nsec += ns as i32;
45 |         if inner.nsec > 1e9 as i32 {
46 |             inner.sec += (inner.nsec / 1e9 as i32) as i64;
47 |             inner.nsec = (inner.nsec % 1e9 as i32) as i32;
48 |         }
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/codec.rs:
--------------------------------------------------------------------------------
  1 | use std::ops::Add;
  2 | 
  3 | use bytes::{Buf, ByteBuf, MutBuf, MutByteBuf, alloc};
  4 | use mio::{TryRead, TryWrite};
  5 | 
  6 | pub trait Codec<In: ?Sized, Out: ?Sized>
  7 | {
  8 |     fn decode(&mut self, buf: &mut In) -> Vec<Out>;
  9 |     fn encode(&self, a: Out) -> In;
 10 | }
 11 | 
 12 | pub struct CodecStack<In, Mid, Out> {
 13 |     left: Box<Codec<In, Mid>>,
 14 |     right: Box<Codec<Mid, Out>>,
 15 | }
 16 | 
 17 | impl <In, Mid, Out> Codec<In, Out> for CodecStack<In, Mid, Out> {
 18 |     fn decode(&mut self, buf: &mut In) -> Vec<Out> {
 19 |         self.left
 20 |             .decode(buf)
 21 |             .iter_mut()
 22 |             .flat_map(|mut d| self.right.decode(&mut d))
 23 |             .collect()
 24 |     }
 25 | 
 26 |     fn encode(&self, out: Out) -> In {
 27 |         self.left.encode(self.right.encode(out))
 28 |     }
 29 | }
 30 | 
 31 | pub struct Framed {
 32 |     sz_buf: MutByteBuf,
 33 |     msg: Option<MutByteBuf>,
 34 | }
 35 | 
 36 | impl Framed {
 37 |     pub fn new() -> Framed {
 38 |         Framed {
 39 |             sz_buf: ByteBuf::mut_with_capacity(4),
 40 |             msg: None,
 41 |         }
 42 |     }
 43 | }
 44 | 
 45 | impl Codec<ByteBuf, ByteBuf> for Framed {
 46 | 
 47 |     fn decode(&mut self, buf: &mut ByteBuf) -> Vec<ByteBuf> {
 48 |         let mut res = vec![];
 49 |         loop {
 50 |             // read size if we don't have a message yet
 51 |             if self.msg.is_none() {
 52 |                 let sz_read = buf.try_read_buf(&mut self.sz_buf);
 53 |                 // if we've read 4 bytes for the size, create a msg
 54 |                 if self.sz_buf.remaining() != 0 {
 55 |                     break;
 56 |                 }
 57 | 
 58 |                 let sz_buf = self.sz_buf.bytes();
 59 |                 let size = array_to_usize([sz_buf[0], sz_buf[1], sz_buf[2],
 60 |                                            sz_buf[3]]);
 61 |                 self.msg = unsafe {
 62 |                     // manually create bytebuf so we can have exact cap and lim
 63 |                     Some(ByteBuf::from_mem_ref(alloc::heap(size.next_power_of_two()),
 64 |                                                size as u32, // cap
 65 |                                                0, // pos
 66 |                                                size as u32 /* lim */)
 67 |                              .flip())
 68 |                 };
 69 |             }
 70 | 
 71 |             if self.msg.is_none() {
 72 |                 break;
 73 |             }
 74 | 
 75 |             let mut msg = self.msg.take().unwrap();
 76 | 
 77 |             // read actual message
 78 |             match buf.try_read_buf(&mut msg) {
 79 |                 Ok(Some(read)) => {
 80 |                     // if we're done, return our Item
 81 |                     if msg.remaining() == 0 {
 82 |                         // get ready to read a new size
 83 |                         self.sz_buf.clear();
 84 |                         // return the message
 85 |                         res.push(msg.flip())
 86 |                     } else {
 87 |                         self.msg = Some(msg);
 88 |                         break
 89 |                     }
 90 |                 }
 91 |                 _ => break,
 92 |             }
 93 |         }
 94 |         res
 95 |     }
 96 | 
 97 |     fn encode(&self, item: ByteBuf) -> ByteBuf {
 98 |         let b = item.bytes();
 99 |         let mut res = ByteBuf::mut_with_capacity(4 + b.len());
100 |         assert!(res.write_slice(&usize_to_array(b.len())) == 4);
101 |         assert!(res.write_slice(b) == b.len());
102 |         res.flip()
103 |     }
104 | }
105 | 
106 | pub fn usize_to_array(u: usize) -> [u8; 4] {
107 |     [(u >> 24) as u8, (u >> 16) as u8, (u >> 8) as u8, u as u8]
108 | }
109 | 
110 | pub fn array_to_usize(ip: [u8; 4]) -> usize {
111 |     ((ip[0] as usize) << 24) as usize + ((ip[1] as usize) << 16) as usize +
112 |     ((ip[2] as usize) << 8) as usize + (ip[3] as usize)
113 | }
114 | 
115 | #[cfg(test)]
116 | mod tests {
117 |     extern crate quickcheck;
118 |     use rand::{Rng, thread_rng};
119 | 
120 |     use codec;
121 |     use codec::Codec;
122 |     use bytes::{Buf, ByteBuf, MutByteBuf};
123 | 
124 |     fn array_prop(u: usize) -> bool {
125 |         codec::array_to_usize(codec::usize_to_array(u)) == u
126 |     }
127 | 
128 |     #[test]
129 |     fn test_usize_to_array_to_usize() {
130 |         quickcheck::quickcheck(array_prop as fn(usize) -> bool);
131 |         let ip = [250, 1, 2, 3];
132 |         assert!(codec::usize_to_array(codec::array_to_usize(ip)) == ip);
133 |     }
134 | 
135 |     fn framed_prop(sz: usize) -> bool {
136 |         if sz == 0 {
137 |             // TODO(tyler) currently, feeding an empty slice to
138 |             // ByteBuf::from_slice causes a segfault...
139 |             return true;
140 |         }
141 |         let mut rng = thread_rng();
142 |         let mut v: Vec<u8> = rng.gen_iter::<u8>().take(sz).collect();
143 |         let mut c = codec::Framed::new();
144 |         let mut bytes = ByteBuf::from_slice(&*v);
145 |         let mut encoded = c.encode(bytes);
146 |         c.decode(&mut encoded).len() == 1
147 |     }
148 | 
149 |     #[test]
150 |     fn test_framed_codec() {
151 |         quickcheck::quickcheck(framed_prop as fn(usize) -> bool);
152 |     }
153 | }
154 | 


--------------------------------------------------------------------------------
/src/etc/loadtest.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"fmt"
 6 | 	"net"
 7 | 	"runtime"
 8 | 
 9 | 	"github.com/spacejam/loghisto"
10 | )
11 | 
12 | func benchmark(conn net.Conn) {
13 | 	fmt.Fprintf(conn, "\x00\x00\x00\x03yo\n")
14 | 	r, err := bufio.NewReader(conn).ReadString('\n')
15 | 	if err != nil {
16 | 		fmt.Errorf("could not read response: %v", err)
17 | 		return
18 | 	}
19 | 	if r != "\x00\x00\x00\x03yo\n" {
20 | 		fmt.Println("bad response")
21 | 	}
22 | }
23 | 
24 | func main() {
25 | 	numCPU := runtime.NumCPU()
26 | 	runtime.GOMAXPROCS(numCPU)
27 | 
28 | 	fire := make(chan struct{})
29 | 	for i := 0; i < 8; i++ {
30 | 		go func() {
31 | 			conn, err := net.Dial("tcp", "localhost:8880")
32 | 			if err != nil {
33 | 				fmt.Errorf("could not connect: %v", err)
34 | 				return
35 | 			}
36 | 			for {
37 | 				<-fire
38 | 				benchmark(conn)
39 | 			}
40 | 		}()
41 | 	}
42 | 
43 | 	desiredConcurrency := uint(10)
44 | 	loghisto.PrintBenchmark("benchmark1234", desiredConcurrency, func() { fire <- struct{}{} })
45 | }
46 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![crate_id = "rasputin"]
 2 | #![crate_type = "lib"]
 3 | 
 4 | pub use serialization::{Append, AppendRes, CASReq, CASRes, CliReq, CliRes,
 5 |                         GetReq, GetRes, Mutation, MutationType, PeerMsg,
 6 |                         RedirectRes, SetReq, SetRes, Version, VoteReq, VoteRes,
 7 |                         WatchReq, WatchRes, DelReq, DelRes};
 8 | 
 9 | pub use codec::{Codec, Framed};
10 | 
11 | pub use clock::{Clock, RealClock, TestClock};
12 | 
13 | pub use range_bounds::RangeBounds;
14 | 
15 | pub use client::Client;
16 | 
17 | pub mod client;
18 | pub mod clock;
19 | pub mod codec;
20 | pub mod logging;
21 | pub mod range_bounds;
22 | pub mod serialization;
23 | pub mod server;
24 | 
25 | extern crate bytes;
26 | #[macro_use]
27 | extern crate log;
28 | #[macro_use]
29 | extern crate lazy_static;
30 | extern crate mio;
31 | extern crate protobuf;
32 | extern crate rand;
33 | extern crate rocksdb;
34 | extern crate time;
35 | extern crate uuid;
36 | extern crate threadpool;
37 | 


--------------------------------------------------------------------------------
/src/logging/mod.rs:
--------------------------------------------------------------------------------
  1 | use std::fs::{self, File, OpenOptions};
  2 | use std::io::{Error, ErrorKind};
  3 | use std::io::prelude::Write;
  4 | use std::path::Path;
  5 | use std::sync::{Arc, Mutex};
  6 | 
  7 | use log::{self, LogLevel, LogLevelFilter, LogMetadata, LogRecord,
  8 |           SetLoggerError};
  9 | use time;
 10 | 
 11 | struct StdoutLogger {
 12 |     level: LogLevel,
 13 | }
 14 | 
 15 | impl log::Log for StdoutLogger {
 16 |     fn enabled(&self, metadata: &LogMetadata) -> bool {
 17 |         metadata.level() <= self.level
 18 |     }
 19 | 
 20 |     fn log(&self, record: &LogRecord) {
 21 |         if self.enabled(record.metadata()) {
 22 |             println!("{} {} {}:{}] {}",
 23 |                      record.level(),
 24 |                      time::now().to_timespec().sec, // TODO(tyler) logical clock
 25 |                      record.location().file().split("/").last().unwrap(),
 26 |                      record.location().line(),
 27 |                      record.args());
 28 |         }
 29 |     }
 30 | }
 31 | 
 32 | struct FileLogger {
 33 |     file: Arc<Mutex<File>>,
 34 |     level: LogLevel,
 35 | }
 36 | 
 37 | impl FileLogger {
 38 |     pub fn new(path: &str, level: LogLevel) -> Result<FileLogger, Error> {
 39 |         let ospath = Path::new(path).parent();
 40 |         if ospath.is_none() {
 41 |             return Err(Error::new(ErrorKind::Other,
 42 |                                   format!("Failed to use log directory: {}",
 43 |                                           path)));
 44 |         }
 45 | 
 46 |         match fs::create_dir_all(&ospath.unwrap()) {
 47 |             Err(e) => return Err(Error::new(ErrorKind::Other,
 48 |                                             format!("Failed to create log \
 49 |                                                      directory: {}",
 50 |                                                     e))),
 51 |             Ok(_) => (),
 52 |         }
 53 | 
 54 |         OpenOptions::new()
 55 |             .create(true)
 56 |             .write(true)
 57 |             .append(true)
 58 |             .open(path)
 59 |             .map(|file| {
 60 |                 FileLogger {
 61 |                     file: Arc::new(Mutex::new(file)),
 62 |                     level: level,
 63 |                 }
 64 |             })
 65 |     }
 66 | }
 67 | 
 68 | impl log::Log for FileLogger {
 69 |     fn enabled(&self, metadata: &LogMetadata) -> bool {
 70 |         metadata.level() <= self.level
 71 |     }
 72 | 
 73 |     fn log(&self, record: &LogRecord) {
 74 |         if self.enabled(record.metadata()) {
 75 |             let mut logfile = self.file.clone();
 76 |             logfile.lock()
 77 |                    .unwrap()
 78 |                    .write_all(format!("{} {} {}:{}] {}\n",
 79 |                                       record.level(),
 80 |                                       time::now().to_timespec().sec,
 81 |                                       record.location()
 82 |                                             .file()
 83 |                                             .split("/")
 84 |                                             .last()
 85 |                                             .unwrap(),
 86 |                                       record.location().line(),
 87 |                                       record.args())
 88 |                                   .as_bytes());
 89 |         }
 90 |     }
 91 | }
 92 | 
 93 | pub fn init_logger(path: Option<String>,
 94 |                    level: LogLevel)
 95 |                    -> Result<(), SetLoggerError> {
 96 |     let logger: Box<log::Log> = match path {
 97 |         Some(p) => Box::new(FileLogger::new(p.trim_left(), level).unwrap()),
 98 |         None => Box::new(StdoutLogger { level: level }),
 99 |     };
100 | 
101 |     log::set_logger(|max_log_level| {
102 |         max_log_level.set(LogLevelFilter::Debug);
103 |         logger
104 |     })
105 | }
106 | 


--------------------------------------------------------------------------------
/src/range_bounds.rs:
--------------------------------------------------------------------------------
 1 | use std::cmp::Ordering;
 2 | 
 3 | pub struct RangeBounds {
 4 |     lower: Vec<u8>,
 5 |     upper: Vec<u8>,
 6 | }
 7 | 
 8 | impl RangeBounds {
 9 |     pub fn new(lower: Vec<u8>, upper: Vec<u8>) -> Result<RangeBounds, String> {
10 |         if lower >= upper {
11 |             Err("lower is >= upper, which is incorrect!".to_string())
12 |         } else {
13 |             Ok(RangeBounds {
14 |                 lower: lower,
15 |                 upper: upper,
16 |             })
17 |         }
18 |     }
19 | }
20 | 
21 | impl Ord for RangeBounds {
22 |     fn cmp(&self, other: &Self) -> Ordering {
23 |         assert!(self.upper > self.lower);
24 |         assert!(other.upper > other.lower);
25 |         if self.upper <= other.lower {
26 |             Ordering::Less
27 |         } else if self.lower >= other.upper {
28 |             Ordering::Greater
29 |         } else {
30 |             Ordering::Equal
31 |         }
32 |     }
33 | }
34 | 
35 | impl PartialOrd for RangeBounds {
36 |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
37 |         Some(self.cmp(other))
38 |     }
39 | }
40 | 
41 | impl PartialEq for RangeBounds {
42 |     fn eq(&self, other: &Self) -> bool {
43 |         (&self.lower, &self.upper) == (&other.lower, &other.upper)
44 |     }
45 | }
46 | 
47 | impl Eq for RangeBounds { }
48 | 


--------------------------------------------------------------------------------
/src/server/acked_log.rs:
--------------------------------------------------------------------------------
  1 | pub use server::{PeerID, TXID, Term};
  2 | use std::fmt;
  3 | 
  4 | use std::collections::BTreeMap;
  5 | 
  6 | pub trait AckedLog<T> {
  7 |     fn append(&mut self, term: Term, txid: TXID, entry: T);
  8 |     fn get(&self, txid: TXID) -> Option<T>;
  9 |     fn ack_up_to(&mut self, txid: TXID, peer: PeerID) -> Vec<(Term, TXID)>;
 10 |     fn commit_up_to(&mut self, txid: TXID) -> Vec<(Term, TXID)>;
 11 |     fn last_learned_term(&self) -> Term;
 12 |     fn last_learned_txid(&self) -> TXID;
 13 |     fn last_accepted_term(&self) -> Term;
 14 |     fn last_accepted_txid(&self) -> TXID;
 15 | }
 16 | 
 17 | // This should be used for testing and debugging only.
 18 | pub trait ViewableLog {
 19 |     fn acked(&self) -> Vec<(Term, TXID)>;
 20 |     fn learned(&self) -> Vec<(Term, TXID)>;
 21 | }
 22 | 
 23 | impl<T> fmt::Debug for AckedLog<T> + Send {
 24 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 25 |         write!(f,
 26 |                "(lt: {} lx: {} at: {} ax: {})",
 27 |                self.last_learned_term(),
 28 |                self.last_learned_txid(),
 29 |                self.last_accepted_term(),
 30 |                self.last_accepted_txid())
 31 |     }
 32 | }
 33 | 
 34 | #[derive(Debug)]
 35 | pub struct LogEntry<T> {
 36 |     txid: TXID,
 37 |     term: Term,
 38 |     last_txid: TXID,
 39 |     last_term: Term,
 40 |     entry: T,
 41 | }
 42 | 
 43 | #[derive(Debug)]
 44 | pub struct Acked<T> {
 45 |     acks: Vec<PeerID>,
 46 |     inner: T,
 47 | }
 48 | 
 49 | // Leaders and Followers have an AckedLog for handling replication.
 50 | // Leaders have quorums of cluster_sz / 2 + 1, and Followers have
 51 | // a quorum of 1 (need a single subsequent ack from leader)
 52 | #[derive(Debug)]
 53 | pub struct InMemoryLog<T> {
 54 |     pub pending: BTreeMap<TXID, Acked<LogEntry<T>>>,
 55 |     pub committed: BTreeMap<TXID, LogEntry<T>>,
 56 |     pub quorum: usize,
 57 |     pub last_learned_txid: TXID,
 58 |     pub last_learned_term: Term,
 59 |     pub last_accepted_txid: TXID,
 60 |     pub last_accepted_term: Term,
 61 | }
 62 | 
 63 | unsafe impl<T> Sync for InMemoryLog<T>{}
 64 | 
 65 | impl<T: Clone> AckedLog<T> for InMemoryLog<T> {
 66 |     fn append(&mut self, term: Term, txid: TXID, entry: T) {
 67 |         //TODO verify txid > last accepted
 68 |         assert!(txid > self.last_accepted_txid);
 69 |         self.pending.insert(txid,
 70 |                             Acked {
 71 |                                 acks: vec![],
 72 |                                 inner: LogEntry {
 73 |                                     txid: txid,
 74 |                                     term: term,
 75 |                                     last_txid: self.last_accepted_txid,
 76 |                                     last_term: self.last_accepted_term,
 77 |                                     entry: entry,
 78 |                                 },
 79 |                             });
 80 |         self.last_accepted_txid = txid;
 81 |         self.last_accepted_term = term;
 82 |     }
 83 | 
 84 |     fn get(&self, txid: TXID) -> Option<T> {
 85 |         self.pending
 86 |             .get(&txid)
 87 |             .map(|al| al.inner.entry.clone())
 88 |             .or(self.committed.get(&txid).map(|l| l.entry.clone()))
 89 |     }
 90 | 
 91 |     // Used by leaders to know when they've gotten enough acks.
 92 |     // returns a set of txid's that have reached quorum
 93 |     fn ack_up_to(&mut self, txid: TXID, peer: PeerID) -> Vec<(Term, TXID)> {
 94 |         // append ack
 95 |         for (txid, ent) in self.pending.iter_mut() {
 96 |             if ent.inner.txid <= *txid {
 97 |                 if !ent.acks.contains(&peer) {
 98 |                     ent.acks.push(peer)
 99 |                 }
100 |                 break
101 |             }
102 |         }
103 |         let mut reached_quorum = vec![];
104 |         loop {
105 |             if self.pending.len() == 0 {
106 |                 break;
107 |             }
108 |             let txid = self.pending.keys().cloned().next().unwrap();
109 |             if self.pending.get(&txid).unwrap().acks.len() < self.quorum {
110 |                 break;
111 |             }
112 |             // TODO(tyler) work out persistence story so we don't lose
113 |             // logs during server crash between remove and push.
114 |             let ent = self.pending.remove(&txid).unwrap();
115 |             self.last_learned_term = ent.inner.term;
116 |             self.last_learned_txid = ent.inner.txid;
117 |             reached_quorum.push((ent.inner.term, ent.inner.txid));
118 |             self.committed.insert(txid, ent.inner);
119 |         }
120 |         reached_quorum
121 |     }
122 | 
123 |     // Used by followers to commit where the leader told them they should
124 |     // be learning up to.
125 |     // returns the set of txids that have reached quorum
126 |     fn commit_up_to(&mut self, txid: TXID) -> Vec<(Term, TXID)> {
127 |         let mut reached_quorum = vec![];
128 |         loop {
129 |             if self.pending.len() == 0 {
130 |                 break;
131 |             }
132 |             let next_txid = self.pending.keys().cloned().next().unwrap();
133 |             if next_txid > txid {
134 |                 break;
135 |             }
136 |             let ent = self.pending.remove(&next_txid).unwrap();
137 | 
138 |             // TODO(tyler) work out persistence story so we don't lose
139 |             // logs during server crash between remove and push.
140 |             self.last_learned_term = ent.inner.term;
141 |             self.last_learned_txid = ent.inner.txid;
142 |             reached_quorum.push((ent.inner.term, ent.inner.txid));
143 |             self.committed.insert(txid, ent.inner);
144 |         }
145 |         reached_quorum
146 |     }
147 | 
148 |     fn last_learned_term(&self) -> Term {
149 |         self.last_learned_term
150 |     }
151 | 
152 |     fn last_learned_txid(&self) -> TXID {
153 |         self.last_learned_txid
154 |     }
155 | 
156 |     fn last_accepted_term(&self) -> Term {
157 |         self.last_accepted_term
158 |     }
159 | 
160 |     fn last_accepted_txid(&self) -> TXID {
161 |         self.last_accepted_txid
162 |     }
163 | }
164 | 
165 | impl<T: Clone> ViewableLog for InMemoryLog<Acked<LogEntry<T>>> {
166 |     fn acked(&self) -> Vec<(Term, TXID)> {
167 |         let mut ret = vec![];
168 |         for (txid, acked) in self.pending.iter() {
169 |             ret.push((acked.inner.term, *txid));
170 |         }
171 |         ret
172 |     }
173 | 
174 |     fn learned(&self) -> Vec<(Term, TXID)> {
175 |         let mut ret = vec![];
176 |         for (txid, learned) in self.committed.iter() {
177 |             ret.push((learned.term, *txid));
178 |         }
179 |         ret
180 |     }
181 | }
182 | 


--------------------------------------------------------------------------------
/src/server/connset.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{Error, ErrorKind};
  2 | use std::io;
  3 | use std::sync::mpsc::Sender;
  4 | 
  5 | use mio;
  6 | use mio::{EventLoop, EventSet, PollOpt, Token};
  7 | use mio::tcp::{TcpListener, TcpStream};
  8 | use mio::util::Slab;
  9 | 
 10 | use server::Envelope;
 11 | use server::server_conn::ServerConn;
 12 | use server::traffic_cop::TrafficCop;
 13 | 
 14 | pub struct ConnSet {
 15 |     pub srv_sock: TcpListener,
 16 |     pub srv_token: Token,
 17 |     pub conns: Slab<ServerConn>,
 18 |     pub req_tx: Sender<Envelope>,
 19 | }
 20 | 
 21 | impl ConnSet {
 22 |     pub fn accept(&mut self,
 23 |                   event_loop: &mut EventLoop<TrafficCop>)
 24 |                   -> io::Result<()> {
 25 | 
 26 |         debug!("ConnSet accepting socket");
 27 | 
 28 |         let sock = try!(self.srv_sock.accept());
 29 |         self.register(sock.unwrap(), event_loop).map(|_| ())
 30 |     }
 31 | 
 32 |     pub fn register(&mut self,
 33 |                     sock: TcpStream,
 34 |                     event_loop: &mut EventLoop<TrafficCop>)
 35 |                     -> io::Result<Token> {
 36 | 
 37 |         let conn = ServerConn::new(sock, self.req_tx.clone());
 38 | 
 39 |         // Re-register accepting socket
 40 |         event_loop.reregister(&self.srv_sock,
 41 |                               self.srv_token,
 42 |                               EventSet::readable(),
 43 |                               PollOpt::edge() | PollOpt::oneshot());
 44 | 
 45 |         self.conns
 46 |             .insert(conn)
 47 |             .map(|tok| {
 48 |             // Register the connection
 49 |                 self.conns[tok].token = Some(tok);
 50 |                 event_loop.register_opt(&self.conns[tok].sock,
 51 |                                         tok,
 52 |                                         EventSet::readable(),
 53 |                                         PollOpt::edge() | PollOpt::oneshot())
 54 |                           .ok()
 55 |                           .expect("could not register socket with event loop");
 56 |                 tok
 57 |             })
 58 |             .or_else(|e| {
 59 |                 Err(Error::new(ErrorKind::Other, "All connection slots full."))
 60 |             })
 61 |     }
 62 | 
 63 |     pub fn conn_readable(&mut self,
 64 |                          event_loop: &mut EventLoop<TrafficCop>,
 65 |                          tok: Token)
 66 |                          -> io::Result<()> {
 67 | 
 68 |         debug!("ConnSet conn readable; tok={:?}", tok);
 69 |         if !self.conns.contains(tok) {
 70 |             debug!("got conn_readable for non-existent token!");
 71 |             return Ok(());
 72 |         }
 73 | 
 74 |         self.conn(tok).readable(event_loop)
 75 |     }
 76 | 
 77 |     pub fn conn_writable(&mut self,
 78 |                          event_loop: &mut EventLoop<TrafficCop>,
 79 |                          tok: Token)
 80 |                          -> io::Result<()> {
 81 |         if !self.conns.contains(tok) {
 82 |             debug!("got conn_writable for non-existent token!");
 83 |             return Ok(());
 84 |         }
 85 | 
 86 |         debug!("ConnSet conn writable; tok={:?}", tok);
 87 |         match self.conn(tok).writable(event_loop) {
 88 |             Err(e) => {
 89 |                 debug!("got err in ConnSet conn_writable: {}", e);
 90 |                 Err(e)
 91 |             }
 92 |             w => w,
 93 |         }
 94 |     }
 95 | 
 96 |     fn conn<'a>(&'a mut self, tok: Token) -> &'a mut ServerConn {
 97 |         &mut self.conns[tok]
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/server/mod.rs:
--------------------------------------------------------------------------------
  1 | mod server;
  2 | mod connset;
  3 | mod server_conn;
  4 | mod traffic_cop;
  5 | mod acked_log;
  6 | pub mod rocksdb;
  7 | 
  8 | pub use server::server::Server;
  9 | pub use server::connset::ConnSet;
 10 | pub use server::server_conn::ServerConn;
 11 | pub use server::acked_log::{AckedLog, InMemoryLog, LogEntry};
 12 | 
 13 | use std::io::{Error, ErrorKind};
 14 | use std::io;
 15 | use std::net::SocketAddr;
 16 | use std::ops::{Add, Sub};
 17 | use std::sync::{Arc, Mutex};
 18 | use std::sync::mpsc::{self, Receiver, SendError, Sender};
 19 | use std::thread;
 20 | use std::usize;
 21 | 
 22 | use bytes::{Buf, ByteBuf, MutByteBuf, SliceBuf, alloc};
 23 | use mio;
 24 | use mio::{EventLoop, EventSet, Handler, NotifyError, PollOpt, Token, TryRead,
 25 |           TryWrite};
 26 | use mio::tcp::{TcpListener, TcpSocket, TcpStream};
 27 | use mio::util::Slab;
 28 | use rand::{Rng, thread_rng};
 29 | use rocksdb::{DB, Writable};
 30 | use protobuf;
 31 | use protobuf::Message;
 32 | use time;
 33 | 
 34 | pub const SERVER_CLIENTS: Token = Token(0);
 35 | pub const SERVER_PEERS: Token = Token(1);
 36 | pub const PEER_BROADCAST: Token = Token(usize::MAX);
 37 | 
 38 | lazy_static! {
 39 |     pub static ref LEADER_DURATION: time::Duration =
 40 |         time::Duration::seconds(12);
 41 |     pub static ref LEADER_REFRESH: time::Duration =
 42 |         time::Duration::seconds(6);
 43 | }
 44 | 
 45 | pub type TXID = u64;
 46 | pub type Term = u64;
 47 | pub type PeerID = String;
 48 | 
 49 | pub struct Envelope {
 50 |     pub address: Option<SocketAddr>,
 51 |     pub tok: Token,
 52 |     pub msg: ByteBuf,
 53 | }
 54 | 
 55 | impl Clone for Envelope {
 56 |     fn clone(&self) -> Self {
 57 |         Envelope {
 58 |             address: self.address,
 59 |             tok: self.tok,
 60 |             msg: ByteBuf::from_slice(self.msg.bytes()),
 61 |         }
 62 |     }
 63 | }
 64 | 
 65 | pub trait SendChannel<M: Send, E> {
 66 |     fn send_msg(&self, msg: M) -> E;
 67 | }
 68 | 
 69 | impl<M: Send> SendChannel<M, Result<(), NotifyError<M>>> for mio::Sender<M> {
 70 |     fn send_msg(&self, msg: M) -> Result<(), NotifyError<M>> {
 71 |         self.send(msg)
 72 |     }
 73 | }
 74 | 
 75 | impl<M: Send> SendChannel<M, Result<(), SendError<M>>> for Sender<M> {
 76 |     fn send_msg(&self, msg: M) -> Result<(), SendError<M>> {
 77 |         self.send(msg)
 78 |     }
 79 | }
 80 | 
 81 | #[derive(Debug, PartialEq, Clone)]
 82 | pub struct Peer {
 83 |     addr: SocketAddr,
 84 |     sock: Option<Token>,
 85 | }
 86 | 
 87 | #[derive(Debug, PartialEq)]
 88 | pub struct RepPeer {
 89 |     last_accepted_term: Term,
 90 |     last_accepted_txid: TXID,
 91 |     max_sent_txid: TXID,
 92 |     tok: Token,
 93 |     id: PeerID,
 94 |     addr: Option<SocketAddr>,
 95 | }
 96 | 
 97 | #[derive(Debug, Clone)]
 98 | pub enum State {
 99 |     Leader {
100 |         term: Term,
101 |         have: Vec<Token>,
102 |         need: u8,
103 |         until: time::Timespec,
104 |     },
105 |     Candidate {
106 |         term: Term,
107 |         have: Vec<Token>,
108 |         need: u8,
109 |         until: time::Timespec,
110 |     },
111 |     Follower {
112 |         term: Term,
113 |         id: PeerID,
114 |         tok: Token,
115 |         leader_addr: SocketAddr,
116 |         until: time::Timespec,
117 |     },
118 |     Init,
119 | }
120 | 
121 | impl State {
122 |     fn valid_leader(&self, now: time::Timespec) -> bool {
123 |         match *self {
124 |             State::Leader{until: until, ..} => now < until,
125 |             State::Follower{
126 |                 term:_, id:_, leader_addr: _, until: until, tok: _
127 |             } => now < until,
128 |             _ => false,
129 |         }
130 |     }
131 | 
132 |     fn valid_candidate(&self, now: time::Timespec) -> bool {
133 |         match *self {
134 |             State::Candidate{until: until, ..} => now < until,
135 |             _ => false,
136 |         }
137 |     }
138 | 
139 |     pub fn is_leader(&self) -> bool {
140 |         match *self {
141 |             State::Leader{..} => true,
142 |             _ => false,
143 |         }
144 |     }
145 | 
146 |     fn is_follower(&self) -> bool {
147 |         match *self {
148 |             State::Follower{..} => true,
149 |             _ => false,
150 |         }
151 |     }
152 | 
153 |     fn is_following(&self, id: PeerID) -> bool {
154 |         match *self {
155 |             State::Follower{id: ref lid, .. } => *lid == id,
156 |             _ => false,
157 |         }
158 |     }
159 | 
160 |     fn is_candidate(&self) -> bool {
161 |         match *self {
162 |             State::Candidate{..} => true,
163 |             _ => false,
164 |         }
165 |     }
166 | 
167 |     fn should_extend_leadership(&self, now: time::Timespec) -> bool {
168 |         match *self {
169 |             State::Leader{until: until, ..} => {
170 |                 now.add(*LEADER_REFRESH) >= until && now < until
171 |             }
172 |             _ => false,
173 |         }
174 |     }
175 | 
176 |     fn can_extend_lead(&self) -> bool {
177 |         match *self {
178 |             State::Candidate{have: ref have, need: need, ..} =>
179 |                 have.len() > need as usize,
180 |             State::Leader{have: ref have, need: need, ..} =>
181 |                 have.len() > need as usize,
182 |             _ => false,
183 |         }
184 |     }
185 | 
186 |     fn following(&self, id: PeerID) -> bool {
187 |         match *self {
188 |             State::Follower{id: ref fid, until: until, .. } => id == *fid,
189 |             _ => false,
190 |         }
191 |     }
192 | 
193 |     fn until(&self) -> Option<time::Timespec> {
194 |         match *self {
195 |             State::Leader{until: until, ..} => Some(until),
196 |             State::Candidate{until: until, ..} => Some(until),
197 |             State::Follower{ until: until, .. } => Some(until),
198 |             _ => None,
199 |         }
200 |     }
201 | 
202 |     pub fn term(&self) -> Option<Term> {
203 |         match *self {
204 |             State::Leader{term: term, ..} => Some(term),
205 |             State::Candidate{term: term, ..} => Some(term),
206 |             State::Follower{term: term, .. } => Some(term),
207 |             _ => None,
208 |         }
209 |     }
210 | }
211 | 


--------------------------------------------------------------------------------
/src/server/rocksdb.rs:
--------------------------------------------------------------------------------
 1 | use rocksdb::{DB, Writable};
 2 | use rocksdb::Options as RocksDBOptions;
 3 | 
 4 | pub fn new(storage_dir: String) -> DB {
 5 |     let mut opts = RocksDBOptions::new();
 6 |     let memtable_budget = 1024;
 7 |     opts.optimize_level_style_compaction(memtable_budget);
 8 |     opts.create_if_missing(true);
 9 |     match DB::open_cf(&opts, &storage_dir, &["storage", "local_meta"]) {
10 |         Ok(db) => db,
11 |         Err(_) => {
12 |             info!("Attempting to initialize data directory at {}", storage_dir);
13 |             match DB::open(&opts, &storage_dir) {
14 |                 Ok(mut db) => {
15 |                     db.create_cf("storage", &RocksDBOptions::new()).unwrap();
16 |                     db.create_cf("local_meta", &RocksDBOptions::new()).unwrap();
17 |                     db
18 |                 }
19 |                 Err(e) => {
20 |                     error!("failed to create database at {}", storage_dir);
21 |                     error!("{}", e);
22 |                     panic!(e);
23 |                 }
24 |             }
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/server/server.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp;
  2 | use std::collections::BTreeMap;
  3 | use std::net::SocketAddr;
  4 | use std::ops::Add;
  5 | use std::process;
  6 | use std::sync::{Arc, Mutex};
  7 | use std::sync::mpsc;
  8 | use std::thread;
  9 | 
 10 | use bytes::{Buf, ByteBuf};
 11 | use mio;
 12 | use mio::{EventLoop, Token};
 13 | use rand::{Rng, thread_rng};
 14 | use rocksdb::{DB, DBResult, Writable};
 15 | use protobuf;
 16 | use protobuf::Message;
 17 | use uuid::Uuid;
 18 | 
 19 | use {Append, AppendRes, CliReq, CliRes, Clock, GetReq, GetRes, Mutation,
 20 |      MutationType, PeerMsg, RealClock, RedirectRes, SetReq, SetRes, Version,
 21 |      CASReq, CASRes, DelReq, DelRes, VoteReq, VoteRes};
 22 | use server::{Envelope, LEADER_DURATION, PEER_BROADCAST, State};
 23 | use server::{AckedLog, InMemoryLog, LogEntry, PeerID, RepPeer, TXID, Term};
 24 | use server::{SendChannel, rocksdb};
 25 | use server::traffic_cop::TrafficCop;
 26 | 
 27 | pub struct Server<C: Clock, RE> {
 28 |     pub clock: Arc<C>,
 29 |     pub peer_port: u16,
 30 |     pub cli_port: u16,
 31 |     pub id: PeerID,
 32 |     pub peers: Vec<String>,
 33 |     pub rep_peers: BTreeMap<PeerID, RepPeer>,
 34 |     pub rpc_tx: Box<SendChannel<Envelope, RE> + Send>,
 35 |     pub max_generated_txid: TXID,
 36 |     pub highest_term: Term,
 37 |     pub state: State,
 38 |     pub db: DB,
 39 |     pub rep_log: Box<AckedLog<Mutation> + Send>,
 40 |     pub pending: BTreeMap<TXID, (Envelope, u64)>,
 41 | }
 42 | 
 43 | unsafe impl<C: Clock, RE> Sync for Server<C, RE>{}
 44 | 
 45 | impl<C: Clock, RE> Server<C, RE> {
 46 | 
 47 |     pub fn run(peer_port: u16,
 48 |                cli_port: u16,
 49 |                storage_dir: String,
 50 |                peers: Vec<String>) {
 51 |         let db = rocksdb::new(storage_dir);
 52 | 
 53 |         // All long-running worker threads get a clone of this
 54 |         // Sender.  When they exit, they send over it.  If the
 55 |         // Receiver ever completes a read, it means something
 56 |         // unexpectedly exited.  It's vital that we shut down
 57 |         // immediately, so we don't repeat the ZK bug where
 58 |         // the heartbeater keeps running while other vital threads
 59 |         // have exited, falsely communicating healthiness.
 60 |         let (thread_exit_tx, thread_exit_rx) = mpsc::channel();
 61 | 
 62 |         // The TrafficCop manages our sockets, sends deserialized
 63 |         // messages over the request channel, and receives completed
 64 |         // responses over the response channel.
 65 |         let (peer_req_tx, peer_req_rx) = mpsc::channel();
 66 |         let (cli_req_tx, cli_req_rx) = mpsc::channel();
 67 | 
 68 |         let mut tc = TrafficCop::new(
 69 |             peer_port,
 70 |             cli_port,
 71 |             peers.clone(),
 72 |             peer_req_tx,
 73 |             cli_req_tx
 74 |         ).unwrap();
 75 | 
 76 |         // A single MIO EventLoop handles our IO
 77 |         let mut event_loop = EventLoop::new().unwrap();
 78 | 
 79 |         // All RPC's are sent over the event_loop's
 80 |         // notification channel.
 81 |         let rpc_tx = event_loop.channel();
 82 | 
 83 |         // start server periodic tasks
 84 |         event_loop.timeout_ms((), thread_rng().gen_range(200, 500)).unwrap();
 85 | 
 86 |         // IO event loop thread
 87 |         let tex1 = thread_exit_tx.clone();
 88 |         thread::Builder::new()
 89 |             .name("IO loop".to_string())
 90 |             .spawn(move || {
 91 |                 tc.run_event_loop(event_loop);
 92 |                 tex1.send(());
 93 |             });
 94 | 
 95 |         let mut rep_log = Box::new(InMemoryLog {
 96 |             pending: BTreeMap::new(),
 97 |             committed: BTreeMap::new(),
 98 |             quorum: peers.len() / 2 + 1,
 99 |             last_learned_txid: 0, // TODO(tyler) read from rocksdb
100 |             last_learned_term: 0, // TODO(tyler) read from rocksdb
101 |             last_accepted_txid: 0, // TODO(tyler) read from rocksdb
102 |             last_accepted_term: 0, // TODO(tyler) read from rocksdb
103 |         });
104 | 
105 |         let clock = Arc::new(RealClock);
106 | 
107 |         let server = Arc::new(Mutex::new(Server {
108 |             clock: clock.clone(),
109 |             peer_port: peer_port,
110 |             cli_port: cli_port,
111 |             id: Uuid::new_v4().to_string(), // TODO(tyler) read from rocksdb
112 |             rpc_tx: Box::new(rpc_tx),
113 |             max_generated_txid: 0, // TODO(tyler) read from rocksdb
114 |             highest_term: 0, // TODO(tyler) read from rocksdb
115 |             state: State::Init,
116 |             db: db,
117 |             rep_log: rep_log,
118 |             peers: peers,
119 |             rep_peers: BTreeMap::new(),
120 |             pending: BTreeMap::new(),
121 |         }));
122 | 
123 |         // peer request handler thread
124 |         let srv1 = server.clone();
125 |         let tex2 = thread_exit_tx.clone();
126 |         thread::Builder::new()
127 |             .name("peer request handler".to_string())
128 |             .spawn(move || {
129 |                 for req in peer_req_rx {
130 |                     match srv1.lock() {
131 |                         Ok(mut srv) => srv.handle_peer(req),
132 |                         Err(e) => {
133 |                             error!("{}", e);
134 |                             process::exit(1);
135 |                         }
136 |                     }
137 |                 }
138 |                 tex2.send(());
139 |             });
140 | 
141 |         // cli request handler thread
142 |         let srv2 = server.clone();
143 |         let tex3 = thread_exit_tx.clone();
144 |         thread::Builder::new()
145 |             .name("cli request handler".to_string())
146 |             .spawn(move || {
147 |                 for req in cli_req_rx {
148 |                     match srv2.lock() {
149 |                         Ok(mut srv) => srv.handle_cli(req),
150 |                         Err(e) => {
151 |                             error!("{}", e);
152 |                             process::exit(1);
153 |                         }
154 |                     }
155 |                 }
156 |                 tex3.send(());
157 |             });
158 | 
159 |         // cron thread
160 |         let srv3 = server.clone();
161 |         let tex4 = thread_exit_tx.clone();
162 |         thread::Builder::new()
163 |             .name("server cron".to_string())
164 |             .spawn(move || {
165 |                 let mut rng = thread_rng();
166 |                 loop {
167 |                     clock.sleep_ms(rng.gen_range(400, 500));
168 |                     match srv3.lock() {
169 |                         Ok(mut srv) => srv.cron(),
170 |                         Err(e) => {
171 |                             error!("{}", e);
172 |                             process::exit(1);
173 |                         }
174 |                     }
175 |                 }
176 |                 tex4.send(());
177 |             });
178 | 
179 |         // this should never receive
180 |         thread_exit_rx.recv();
181 |         let msg = "A worker thread unexpectedly exited! Shutting down.";
182 |         error!("{}", msg);
183 |         panic!("A worker thread unexpectedly exited! Shutting down.");
184 |     }
185 | 
186 |     fn update_rep_peers(&mut self,
187 |                         peer_id: PeerID,
188 |                         addr: Option<SocketAddr>,
189 |                         tok: Token) {
190 |         // don't send replication traffic to self
191 |         if self.id == peer_id {
192 |             return;
193 |         }
194 | 
195 |         // set up a rep peer for this socket, and
196 |         // reset possibly old ones
197 |         match self.rep_peers
198 |                   .insert(peer_id.clone(),
199 |                           RepPeer {
200 |                               max_sent_txid: self.rep_log.last_accepted_txid(),
201 |                               last_accepted_txid: self.rep_log
202 |                                                       .last_accepted_txid(),
203 |                               last_accepted_term: self.rep_log
204 |                                                       .last_accepted_term(),
205 |                               tok: tok,
206 |                               id: peer_id.clone(),
207 |                               addr: addr,
208 |                           }) {
209 |             Some(old_rep_peer) => {
210 |                 // retain previous offset information
211 |                 let new_rep_peer = self.rep_peers.get_mut(&peer_id).unwrap();
212 |                 new_rep_peer.max_sent_txid = old_rep_peer.max_sent_txid;
213 |                 new_rep_peer.last_accepted_txid = old_rep_peer.last_accepted_txid;
214 |                 new_rep_peer.last_accepted_term = old_rep_peer.last_accepted_term;
215 |             }
216 |             _ => (),
217 |         }
218 |     }
219 | 
220 |     fn handle_vote_res(&mut self,
221 |                        env: Envelope,
222 |                        peer_id: PeerID,
223 |                        vote_res: &VoteRes) {
224 |         debug!("{} got response for vote request from {}",
225 |                self.id,
226 |                env.address.unwrap());
227 |         let term = self.state.term();
228 | 
229 |         if term.is_none() || vote_res.get_term() != term.unwrap() {
230 |             // got response for an term that is not valid
231 |             debug!("invalid term, ignoring vote res");
232 |             return
233 |         }
234 | 
235 |         // Reset if we get any nacks as a candidate.
236 |         // This is a difference from Raft, where any node can dethrone
237 |         // an otherwise healthy leader with a higher term.  We will give
238 |         // up on our own if we don't get a majority of unique votes
239 |         // by the time our leader lease expires.  This protects us against
240 |         // a single partially partitioned node from livelocking our cluster.
241 |         if self.state.valid_candidate(self.clock.now()) &&
242 |            !vote_res.get_success() {
243 |             // TODO(tyler) set term in rocksdb
244 |             if vote_res.get_term() > self.highest_term {
245 |                 self.highest_term = vote_res.get_term();
246 |             }
247 |             self.state = State::Init;
248 |             // reset replication peers
249 |             self.rep_peers = BTreeMap::new();
250 |         } else if self.state.valid_candidate(self.clock.now()) {
251 |             // we're currently a candidate, so see if we can ascend to
252 |             // leader or if we need to give up
253 |             self.state = match self.state.clone() {
254 |                 State::Candidate{
255 |                     term: term,
256 |                     until: until,
257 |                     need: need,
258 |                     have: ref have,
259 |                 } => {
260 |                     let mut new_have = have.clone();
261 |                     if !new_have.contains(&env.tok) &&
262 |                        vote_res.get_term() == term {
263 |                         new_have.push(env.tok);
264 |                         self.update_rep_peers(peer_id, env.address, env.tok);
265 |                     }
266 |                     if new_have.len() >= need as usize {
267 |                         // we've ascended to leader!
268 |                         info!("{} transitioning to leader state", self.id);
269 |                         new_have = vec![];
270 |                         let state = State::Leader {
271 |                             term: term,
272 |                             until: until, // don't extend until
273 |                             need: need,
274 |                             have: new_have,
275 |                         };
276 |                         info!("{:?}", state);
277 |                         Some(state)
278 |                     } else {
279 |                         debug!("need more votes, have {} need {}",
280 |                                new_have.len(),
281 |                                need);
282 |                         // we still need more votes
283 |                         Some(State::Candidate {
284 |                             term: term,
285 |                             until: until,
286 |                             need: need,
287 |                             have: new_have,
288 |                         })
289 |                     }
290 |                 }
291 |                 _ => None,
292 |             }
293 |                              .unwrap();
294 |         } else if self.state.is_leader() &&
295 |            self.state.valid_leader(self.clock.now()) &&
296 |            vote_res.get_success() {
297 | 
298 |             self.state = match self.state.clone() {
299 |                 State::Leader{
300 |                     term: term,
301 |                     until: until,
302 |                     need: need,
303 |                     have: ref have
304 |                 } => {
305 |                     let mut new_until = until;
306 |                     let mut new_have = have.clone();
307 |                     if !new_have.contains(&env.tok) &&
308 |                        vote_res.get_term() == term {
309 |                         new_have.push(env.tok);
310 |                         self.update_rep_peers(peer_id, env.address, env.tok);
311 |                     }
312 |                     if new_have.len() >= need as usize {
313 |                         debug!("{} leadership extended", self.id);
314 |                         new_have = vec![];
315 |                         new_until = self.clock.now().add(*LEADER_DURATION);
316 |                     }
317 |                     Some(State::Leader {
318 |                         term: term,
319 |                         until: new_until,
320 |                         need: need,
321 |                         have: new_have,
322 |                     })
323 |                 }
324 |                 _ => None,
325 |             }
326 |                              .unwrap()
327 |         } else if !vote_res.get_success() {
328 |             warn!("{} received vote nack from {}", self.id, peer_id);
329 |         } else {
330 |             // this can happen if a vote res is received by a follower
331 |             error!("got vote response, but we can't handle it");
332 |             error!("valid leader: {}",
333 |                    self.state.valid_leader(self.clock.now()));
334 |             error!("is leader: {}", self.state.is_leader());
335 |             error!("valid candidate: {}",
336 |                    self.state.valid_candidate(self.clock.now()));
337 |             error!("is candidate: {}", self.state.is_candidate());
338 |             error!("res term: {}", vote_res.get_term());
339 |             error!("our term: {}", self.state.term().unwrap());
340 |         }
341 |     }
342 | 
343 |     fn handle_vote_req(&mut self,
344 |                        env: Envelope,
345 |                        peer_id: PeerID,
346 |                        vote_req: &VoteReq) {
347 |         let mut res = PeerMsg::new();
348 |         res.set_srvid(self.id.clone());
349 |         let mut vote_res = VoteRes::new();
350 |         vote_res.set_term(vote_req.get_term());
351 | 
352 |         if peer_id == self.id {
353 |             // if we are this node (broadcast is naive) then all is well
354 |             // reply to self but don't change to follower
355 |             vote_res.set_success(true);
356 |         } else if self.state.valid_leader(self.clock.now()) &&
357 |            !self.state.following(peer_id.clone()) {
358 |             // if we're already following a different node, reject
359 | 
360 |             warn!("got unwanted vote req from {}", peer_id);
361 |             // communicate to the source what our term is so they
362 |             // can quickly get followers when we're dead.
363 |             vote_res.set_term(self.state.term().unwrap());
364 |             vote_res.set_success(false);
365 |         } else if self.state.following(peer_id.clone()) {
366 |             // if we're already following this node, keed doing so
367 |             debug!("{} extending followership of {}", self.id, peer_id);
368 |             self.state = match self.state {
369 |                 State::Follower{
370 |                     term: term,
371 |                     id: ref id,
372 |                     leader_addr: leader_addr,
373 |                     until: _,
374 |                     tok: tok,
375 |                 } => Some(State::Follower {
376 |                     term: term,
377 |                     id: id.clone(),
378 |                     leader_addr: leader_addr,
379 |                     until: self.clock.now().add(*LEADER_DURATION),
380 |                     tok: tok,
381 |                 }),
382 |                 _ => None,
383 |             }
384 |                              .unwrap();
385 |             vote_res.set_success(true);
386 |         } else if self.should_grant_vote(vote_req) {
387 |             self.highest_term = vote_req.get_term();
388 |             info!("new leader {}", peer_id);
389 |             self.state = State::Follower {
390 |                 id: peer_id.clone(),
391 |                 term: vote_req.get_term(),
392 |                 tok: env.tok,
393 |                 leader_addr: env.address.unwrap(),
394 |                 until: self.clock.now().add(*LEADER_DURATION),
395 |             };
396 |             info!("{:?}", self.state);
397 |             vote_res.set_success(true);
398 |         } else {
399 |             match self.state.term() {
400 |                 Some(term) => vote_res.set_term(term),
401 |                 None => (),
402 |             }
403 | 
404 |             vote_res.set_success(false);
405 |         }
406 |         res.set_vote_res(vote_res);
407 |         self.reply(env, ByteBuf::from_slice(&*res.write_to_bytes().unwrap()));
408 |     }
409 | 
410 |     fn handle_append(&mut self,
411 |                      env: Envelope,
412 |                      peer_id: PeerID,
413 |                      append: &Append) {
414 |         if self.state.is_leader() {
415 |             warn!("Leader got append request!  This shouldn't happen.");
416 |             return;
417 |         }
418 | 
419 |         let mut res = PeerMsg::new();
420 |         res.set_srvid(self.id.clone());
421 |         let mut append_res = AppendRes::new();
422 | 
423 |         // verify that we are following this node
424 |         if self.state.is_following(peer_id.clone()) {
425 |             // verify that it links
426 |             if append.get_from_term() == self.rep_log.last_accepted_term() &&
427 |                append.get_from_txid() == self.rep_log.last_accepted_txid() {
428 | 
429 |                 let mut max_term = self.rep_log.last_accepted_term();
430 |                 let mut max_txid = self.rep_log.last_accepted_txid();
431 |                 for mutation in append.get_batch() {
432 |                     let version = mutation.get_version();
433 |                     if version.get_term() < max_term {
434 |                         error!("mutation term: {} our max: {}",
435 |                                version.get_term(),
436 |                                max_term);
437 |                         panic!("replication stream has decreasing term");
438 |                     }
439 |                     if version.get_txid() <= max_txid {
440 |                         warn!("mutation txid: {} our max: {}",
441 |                               version.get_txid(),
442 |                               max_txid);
443 |                         continue;
444 |                     }
445 |                     max_term = version.get_term();
446 |                     max_txid = version.get_txid();
447 |                     debug!("accepting message txid {}", version.get_txid());
448 |                     self.rep_log.append(version.get_term(),
449 |                                         version.get_txid(),
450 |                                         mutation.clone());
451 |                 }
452 | 
453 |                 append_res.set_accepted(true);
454 |                 append_res.set_last_accepted_term(max_term);
455 |                 append_res.set_last_accepted_txid(max_txid);
456 | 
457 |                 // Bump up generator for future use if we transition to leader.
458 |                 self.max_generated_txid = max_txid;
459 | 
460 |                 for (term, txid) in
461 |                     self.rep_log.commit_up_to(append.get_last_learned_txid()) {
462 | 
463 |                     debug!("follower learning term {} txid {}", term, txid);
464 |                     self.learn(term, txid);
465 |                     debug!("learned");
466 |                 }
467 |             } else {
468 |                 // this update doesn't link to our last entry, so tell the
469 |                 // leader where to replicate from.
470 |                 warn!("failed to link msg from: {}", append.get_from_txid());
471 |                 warn!("{:?}", self.state);
472 |                 append_res.set_accepted(false);
473 |                 append_res.set_last_accepted_term(self.rep_log
474 |                                                       .last_accepted_term());
475 |                 append_res.set_last_accepted_txid(self.rep_log
476 |                                                       .last_accepted_txid());
477 |             }
478 |         }
479 | 
480 |         res.set_append_res(append_res);
481 | 
482 |         self.reply(env, ByteBuf::from_slice(&*res.write_to_bytes().unwrap()));
483 |     }
484 | 
485 |     fn handle_append_res(&mut self,
486 |                          env: Envelope,
487 |                          peer_id: PeerID,
488 |                          append_res: &AppendRes) {
489 |         // verify that we are leading
490 |         if !self.state.is_leader() {
491 |             return;
492 |         }
493 | 
494 |         // update peer's info (which may be divergent!)
495 |         let mut accepted = vec![];
496 |         match self.rep_peers.get_mut(&peer_id) {
497 |             Some(ref mut rep_peer) => {
498 |                 rep_peer.last_accepted_term =
499 |                     append_res.get_last_accepted_term();
500 |                 rep_peer.last_accepted_txid =
501 |                     append_res.get_last_accepted_txid();
502 | 
503 |                 // reset max sent if we need to backfill
504 |                 if !append_res.get_accepted() {
505 |                     rep_peer.max_sent_txid =
506 |                         append_res.get_last_accepted_txid();
507 |                 }
508 | 
509 |                 // see if we can mark any updates as accepted
510 |                 accepted = self.rep_log
511 |                                .ack_up_to(append_res.get_last_accepted_txid(),
512 |                                           peer_id);
513 |             }
514 |             None => error!("got AppendRes for non-existent peer!"),
515 |         }
516 |         for (term, txid) in accepted {
517 |             debug!("leader learning txid {}", txid);
518 |             self.learn(term, txid);
519 |         }
520 |     }
521 | 
522 |     pub fn handle_peer(&mut self, env: Envelope) {
523 |         let peer_msg: PeerMsg = protobuf::parse_from_bytes(env.msg.bytes())
524 |                                     .unwrap();
525 |         let peer_id = peer_msg.get_srvid();
526 | 
527 |         if peer_msg.has_vote_res() {
528 |             self.handle_vote_res(env,
529 |                                  peer_id.to_string(),
530 |                                  peer_msg.get_vote_res());
531 |         } else if peer_msg.has_vote_req() {
532 |             self.handle_vote_req(env,
533 |                                  peer_id.to_string(),
534 |                                  peer_msg.get_vote_req());
535 |         } else if peer_msg.has_append() {
536 |             self.handle_append(env, peer_id.to_string(), peer_msg.get_append());
537 |         } else if peer_msg.has_append_res() {
538 |             self.handle_append_res(env,
539 |                                    peer_id.to_string(),
540 |                                    peer_msg.get_append_res());
541 |         } else {
542 |             error!("got unhandled peer message! {:?}", peer_msg);
543 |         }
544 |     }
545 | 
546 |     fn handle_cli(&mut self, req: Envelope) {
547 |         let cli_req: CliReq = protobuf::parse_from_bytes(req.msg.bytes())
548 |                                   .unwrap();
549 |         let mut res = CliRes::new();
550 |         res.set_req_id(cli_req.get_req_id());
551 |         if !self.state.is_leader() {
552 |             // If we aren't the leader, we must return some sort of
553 |             // a RedirectRes instead of a response.
554 |             let mut redirect_res = RedirectRes::new();
555 |             // If we're a follower, a leader has been elected, so
556 |             // sets the return address.
557 |             if self.state.is_follower() {
558 |                 let leader_address = match self.state {
559 |                     State::Follower{
560 |                         term: _,
561 |                         id: _,
562 |                         leader_addr: leader_addr,
563 |                         until: _,
564 |                         tok: _,
565 |                     } => Some(leader_addr),
566 |                     _ => None,
567 |                 }
568 |                                          .unwrap();
569 |                 redirect_res.set_success(true);
570 |                 redirect_res.set_address(format!("{:?}", leader_address));
571 |             } else {
572 |                 redirect_res.set_success(false);
573 |                 redirect_res.set_err("No leader has been elected yet"
574 |                                          .to_string());
575 |             }
576 |             res.set_redirect(redirect_res);
577 |         } else if cli_req.has_get() {
578 |             let get_req = cli_req.get_get();
579 |             let mut get_res = GetRes::new();
580 |             self.db
581 |                 .get(get_req.get_key())
582 |                 .map(|value| {
583 |                     get_res.set_success(true);
584 |                     get_res.set_value((*value).to_vec());
585 |                 })
586 |                 .on_absent(|| {
587 |                     get_res.set_success(false);
588 |                     get_res.set_err("Key not found".to_string())
589 |                 })
590 |                 .on_error(|e| {
591 |                     error!("Operational problem encountered: {}", e);
592 |                     get_res.set_success(false);
593 |                     get_res.set_err("Operational problem encountered"
594 |                                         .to_string());
595 |                 });
596 |             get_res.set_txid(self.rep_log.last_learned_txid());
597 |             res.set_get(get_res);
598 |         } else if cli_req.has_set() {
599 |             let txid = self.new_txid();
600 |             let set_req = cli_req.get_set();
601 | 
602 |             // replicate the mutation
603 |             let mut version = Version::new();
604 |             version.set_txid(txid);
605 |             version.set_term(self.state.term().unwrap());
606 | 
607 |             let mut mutation = Mutation::new();
608 |             mutation.set_field_type(MutationType::KVSET);
609 |             mutation.set_version(version);
610 |             mutation.set_key(set_req.get_key().to_vec());
611 |             mutation.set_value(set_req.get_value().to_vec());
612 | 
613 |             info!("adding pending entry for txid {}", txid);
614 |             self.pending.insert(txid, (req, cli_req.get_req_id()));
615 |             self.replicate(vec![mutation]);
616 |             // send a response later after this txid is learned
617 |             return;
618 |         } else if cli_req.has_cas() {
619 |             let txid = self.new_txid();
620 |             let cas_req = cli_req.get_cas();
621 | 
622 |             // replicate the mutation
623 |             let mut version = Version::new();
624 |             version.set_txid(txid);
625 |             version.set_term(self.state.term().unwrap());
626 | 
627 |             let mut mutation = Mutation::new();
628 |             mutation.set_field_type(MutationType::KVCAS);
629 |             mutation.set_version(version);
630 |             mutation.set_key(cas_req.get_key().to_vec());
631 |             mutation.set_value(cas_req.get_new_value().to_vec());
632 |             mutation.set_old_value(cas_req.get_old_value().to_vec());
633 | 
634 |             self.pending.insert(txid, (req, cli_req.get_req_id()));
635 |             self.replicate(vec![mutation]);
636 |             // send a response later after this txid is learned
637 |             return;
638 |         } else if cli_req.has_del() {
639 |             let txid = self.new_txid();
640 |             let del_req = cli_req.get_del();
641 | 
642 |             // replicate the mutation
643 |             let mut version = Version::new();
644 |             version.set_txid(txid);
645 |             version.set_term(self.state.term().unwrap());
646 | 
647 |             let mut mutation = Mutation::new();
648 |             mutation.set_field_type(MutationType::KVDEL);
649 |             mutation.set_version(version);
650 |             mutation.set_key(del_req.get_key().to_vec());
651 | 
652 |             self.pending.insert(txid, (req, cli_req.get_req_id()));
653 |             self.replicate(vec![mutation]);
654 |             // send a response later after this txid is learned
655 |             return;
656 |         }
657 | 
658 |         self.reply(req, ByteBuf::from_slice(&*res.write_to_bytes().unwrap()));
659 |     }
660 | 
661 |     pub fn cron(&mut self) {
662 |         debug!("{} state: {:?}", self.id, self.state);
663 |         debug!("{} log: {:?}", self.id, self.rep_log);
664 |         // become candidate if we need to
665 |         if !self.state.valid_leader(self.clock.now()) &&
666 |            !self.state.valid_candidate(self.clock.now()) {
667 |             info!("{} transitioning to candidate state", self.id);
668 |             self.highest_term += 1;
669 |             self.state = State::Candidate {
670 |                 term: self.highest_term,
671 |                 until: self.clock.now().add(*LEADER_DURATION),
672 |                 need: (self.peers.len() / 2 + 1) as u8,
673 |                 have: vec![],
674 |             };
675 |             info!("{:?}", self.state);
676 |         }
677 | 
678 |         // request or extend leadership
679 |         if self.state.should_extend_leadership(self.clock.now()) ||
680 |            self.state.valid_candidate(self.clock.now()) {
681 | 
682 |             debug!("broadcasting VoteReq");
683 |             let mut req = PeerMsg::new();
684 |             req.set_srvid(self.id.clone());
685 |             let mut vote_req = VoteReq::new();
686 |             vote_req.set_term(self.state.term().unwrap());
687 |             vote_req.set_last_accepted_term(self.rep_log.last_accepted_term());
688 |             vote_req.set_last_accepted_txid(self.rep_log.last_accepted_txid());
689 |             vote_req.set_last_learned_term(self.rep_log.last_learned_term());
690 |             vote_req.set_last_learned_txid(self.rep_log.last_learned_txid());
691 |             req.set_vote_req(vote_req);
692 |             self.peer_broadcast(ByteBuf::from_slice(&*req.write_to_bytes()
693 |                                                          .unwrap()));
694 |         }
695 | 
696 |         // TODO(tyler) decide on whether to use heartbeats
697 |         /*
698 |         // heartbeat
699 |         if self.state.is_leader() {
700 |             let mut version = Version::new();
701 |             version.set_txid(self.new_txid());
702 |             version.set_term(self.state.term().unwrap());
703 | 
704 |             let mut mutation = Mutation::new();
705 |             mutation.set_field_type(MutationType::KVSET);
706 |             mutation.set_version(version);
707 |             mutation.set_key(b"heartbeat".to_vec());
708 |             mutation.set_value(format!("{}", self.clock.now().sec)
709 |                              .as_bytes()
710 |                              .to_vec());
711 | 
712 | 
713 |             self.replicate(vec![mutation]);
714 |         }
715 |         */
716 |     }
717 | 
718 |     fn new_txid(&mut self) -> TXID {
719 |         self.max_generated_txid += 1;
720 |         info!("generating txid {}, {:?}",
721 |               self.max_generated_txid,
722 |               self.rep_log);
723 |         self.max_generated_txid
724 |     }
725 | 
726 |     fn reply(&mut self, req: Envelope, res_buf: ByteBuf) {
727 |         self.rpc_tx.send_msg(Envelope {
728 |             address: req.address,
729 |             tok: req.tok,
730 |             msg: res_buf,
731 |         });
732 |     }
733 | 
734 |     fn peer_broadcast(&mut self, msg: ByteBuf) {
735 |         self.rpc_tx.send_msg(Envelope {
736 |             address: None,
737 |             tok: PEER_BROADCAST,
738 |             msg: msg,
739 |         });
740 |     }
741 | 
742 |     fn replicate(&mut self, mutations: Vec<Mutation>) {
743 |         if mutations.len() > 0 {
744 |             for mutation in mutations {
745 |                 let txid = mutation.get_version().get_txid();
746 |                 self.rep_log.append(mutation.get_version().get_term(),
747 |                                     txid,
748 |                                     mutation);
749 | 
750 |                 // this should only be learned on single replica collections
751 |                 let accepted = self.rep_log.ack_up_to(txid, self.id.clone());
752 |                 for (term, txid) in accepted {
753 |                     debug!("leader learning txid {}", txid);
754 |                     self.learn(term, txid);
755 |                 }
756 |             }
757 | 
758 |             debug!("in replicate, we have {} rep_peers", self.rep_peers.len());
759 | 
760 |             // for each peer, send them their next message
761 |             for (_, peer) in self.rep_peers.iter_mut() {
762 |                 let mut append = Append::new();
763 |                 append.set_from_txid(peer.last_accepted_txid);
764 |                 append.set_from_term(peer.last_accepted_term);
765 |                 append.set_last_learned_txid(self.rep_log.last_learned_txid());
766 |                 let mut batch = vec![];
767 |                 for txid in peer.max_sent_txid + 1..peer.max_sent_txid + 100 {
768 | 
769 |                     match self.rep_log.get(txid) {
770 |                         Some(mutation) => {
771 |                             // TODO(tyler) can we avoid copies here?
772 |                             // maybe if multiple Buf implementors could
773 |                             // hold RC<Box<underlying>>?
774 |                             batch.push(mutation.clone());
775 |                             peer.max_sent_txid = mutation.get_version()
776 |                                                          .get_txid();
777 |                         }
778 |                         None => (),
779 |                     }
780 |                 }
781 | 
782 |                 append.set_batch(protobuf::RepeatedField::from_vec(batch));
783 | 
784 |                 let mut peer_msg = PeerMsg::new();
785 |                 peer_msg.set_srvid(self.id.clone());
786 |                 peer_msg.set_append(append);
787 | 
788 |                 self.rpc_tx.send_msg(Envelope {
789 |                     address: peer.addr,
790 |                     tok: peer.tok,
791 |                     msg: ByteBuf::from_slice(&*peer_msg.write_to_bytes()
792 |                                                        .unwrap()),
793 |                 });
794 |             }
795 |         }
796 | 
797 |         let peer_ids: Vec<PeerID> = self.rep_peers.keys().cloned().collect();
798 |         debug!("accepted: {} learned: {}",
799 |                self.rep_log.last_accepted_txid(),
800 |                self.rep_log.last_learned_txid());
801 |         debug!("rep log unaccepted len: {:?}",
802 |                self.rep_log.last_accepted_txid() -
803 |                self.rep_log.last_learned_txid());
804 |         debug!("peers: {:?}", peer_ids);
805 |     }
806 | 
807 |     fn learn(&mut self, term: Term, txid: TXID) {
808 | 
809 |         debug!("trying to get txid {} in rep log", txid);
810 |         let mutation = match self.rep_log.get(txid) {
811 |             Some(m) => m,
812 |             None => {
813 |                 debug!("we don't have this tx in our log yet");
814 |                 return
815 |             }
816 |         };
817 |         debug!("got txid {} from rep log", txid);
818 | 
819 |         let mut res = CliRes::new();
820 | 
821 |         info!("matching field type {:?}", mutation.get_field_type());
822 |         match mutation.get_field_type() {
823 |             MutationType::KVSET => {
824 |                 info!("processing set!");
825 |                 let mut set_res = SetRes::new();
826 |                 match self.db.put(mutation.get_key(), mutation.get_value()) {
827 |                     Ok(_) => set_res.set_success(true),
828 |                     Err(e) => {
829 |                         error!("Operational problem encountered: {}", e);
830 |                         set_res.set_success(false);
831 |                         set_res.set_err("Operational problem encountered".to_string());
832 |                     }
833 |                 }
834 |                 res.set_set(set_res);
835 |             },
836 |             MutationType::KVCAS => {
837 |                 let mut cas_res = CASRes::new();
838 |                 match self.db.get(mutation.get_key()) {
839 |                     DBResult::Some(old_val) => {
840 |                         if mutation.has_old_value() &&
841 |                             *old_val == *mutation.get_old_value() {
842 | 
843 |                             // compare succeeded, let's try to set
844 |                             match self.db.put(mutation.get_key(), mutation.get_value()) {
845 |                                 Ok(_) => {
846 |                                     cas_res.set_success(true);
847 |                                     cas_res.set_value(mutation.get_value().to_vec());
848 |                                 },
849 |                                 Err(e) => {
850 |                                     error!("Operational problem encountered: {}", e);
851 |                                     cas_res.set_success(false);
852 |                                     cas_res.set_err("Operational problem encountered".to_string());
853 |                                     cas_res.set_value(old_val.to_vec());
854 |                                 }
855 |                             }
856 |                         } else {
857 |                             cas_res.set_success(false);
858 |                             cas_res.set_err("compare failure".to_string());
859 |                             cas_res.set_value(old_val.to_vec());
860 |                         }
861 |                     },
862 |                     DBResult::None => {
863 |                         if !mutation.has_old_value() {
864 |                             match self.db.put(mutation.get_key(), mutation.get_value()) {
865 |                                 Ok(_) => {
866 |                                     cas_res.set_success(true);
867 |                                     cas_res.set_value(mutation.get_value().to_vec());
868 |                                 },
869 |                                 Err(e) => {
870 |                                     error!("Operational problem encountered: {}", e);
871 |                                     cas_res.set_success(false);
872 |                                     cas_res.set_err(format!("Operational problem encountered: {}", e));
873 |                                 }
874 |                             }
875 |                         } else {
876 |                             cas_res.set_success(false);
877 |                             cas_res.set_err("compare failure".to_string());
878 |                         }
879 |                     },
880 |                     DBResult::Error(e) => {
881 |                         cas_res.set_success(false);
882 |                         error!("Operational problem encountered: {}", e);
883 |                         cas_res.set_err(format!("Operational problem encountered: {}", e));
884 |                     },
885 |                 }
886 |                 cas_res.set_txid(self.rep_log.last_learned_txid());
887 |                 res.set_cas(cas_res);
888 |             },
889 |             MutationType::KVDEL => {
890 |                 let mut del_res = DelRes::new();
891 |                 // If the value exists, return it.
892 |                 match self.db.get(mutation.get_key()) {
893 |                     DBResult::Some(old_val) => {
894 |                         del_res.set_value(old_val.to_vec());
895 |                     }
896 |                     DBResult::None => (), // we don't care
897 |                     DBResult::Error(e) => (), // we don't care, but we probably should
898 |                 }
899 |                 match self.db.delete(mutation.get_key()) {
900 |                     Ok(_) => del_res.set_success(true),
901 |                     Err(e) => {
902 |                         error!("Operational problem encountered: {}", e);
903 |                         del_res.set_success(false);
904 |                         del_res.set_err(format!("Operational problem encountered: {}", e));
905 |                     }
906 |                 }
907 |                 res.set_del(del_res);
908 |             },
909 |         }
910 | 
911 |         // TODO(tyler) use persisted crash-proof logic
912 |         let pending = self.pending.remove(&txid);
913 |         match pending {
914 |             Some((env, req_id)) => {
915 |                 info!("found pending listener");
916 |                 // If there's a pending client request associated with this,
917 |                 // then send them a response.
918 |                 res.set_req_id(req_id);
919 |                 self.reply(env,
920 |                            ByteBuf::from_slice(&*res.write_to_bytes()
921 |                                                     .unwrap()));
922 |             }
923 |             None => {
924 |                 info!("could not find pending for this learned request");   
925 |             },
926 |         }
927 |     }
928 | 
929 |     // These conditions guarantee that we don't lose acked writes
930 |     // as long as a majority of our previous nodes stay alive.
931 |     fn should_grant_vote(&self, vote_req: &VoteReq) -> bool {
932 |         if self.state.valid_leader(self.clock.now()) {
933 |             // we already have (or are) a valid leader
934 |             false
935 |         } else if vote_req.get_term() < self.rep_log.last_learned_term() {
936 |             // This refers to a stale term.  Note that we can still vote for
937 |             // vote requestors with lower terms than we've accepted but not
938 |             // learned, because our acks may not have actually gained quorum.
939 |             // This is safe because any vote requestors that receives a quorum
940 |             // of votes will have anything that reached quorum in past rounds
941 |             // with the same members.
942 |             false
943 |         } else {
944 |             // at this point, we need to verify one of two conditions:
945 |             // 1. that the vote requestor has learned anything in a higher
946 |             //    term than we have
947 |             // 2. that the last term the vote requestor has learned something
948 |             //    is the same as ours, and the requestor has accepted at least
949 |             //    as many mutations within that term as we have
950 |             if vote_req.get_last_learned_term() >
951 |                self.rep_log.last_learned_term() {
952 |                 // case 1
953 |                 true
954 |             } else if vote_req.get_last_learned_term() ==
955 |                self.rep_log.last_learned_term() &&
956 |                vote_req.get_last_accepted_txid() >=
957 |                self.rep_log.last_accepted_txid() {
958 |                 // case 2
959 |                 true
960 |             } else {
961 |                 // at this point, we know that we have a log that is more
962 |                 // recent than the vote requestor.
963 |                 false
964 |             }
965 |         }
966 |     }
967 | }
968 | 


--------------------------------------------------------------------------------
/src/server/server_conn.rs:
--------------------------------------------------------------------------------
  1 | use std::io;
  2 | use std::sync::mpsc::Sender;
  3 | 
  4 | use bytes::{Buf, ByteBuf};
  5 | use mio::{EventLoop, EventSet, PollOpt, Token, TryRead, TryWrite};
  6 | use mio::tcp::TcpStream;
  7 | 
  8 | use codec::{self, Codec};
  9 | use server::Envelope;
 10 | use server::traffic_cop::TrafficCop;
 11 | 
 12 | pub struct ServerConn {
 13 |     pub sock: TcpStream,
 14 |     pub req_tx: Sender<Envelope>,
 15 |     pub res_bufs: Vec<ByteBuf>, // TODO(tyler) use proper dequeue
 16 |     pub res_remaining: usize,
 17 |     pub req_codec: codec::Framed,
 18 |     pub token: Option<Token>,
 19 |     pub interest: EventSet,
 20 | }
 21 | 
 22 | impl ServerConn {
 23 |     pub fn new(sock: TcpStream, req_tx: Sender<Envelope>) -> ServerConn {
 24 |         ServerConn {
 25 |             sock: sock,
 26 |             req_tx: req_tx,
 27 |             req_codec: codec::Framed::new(),
 28 |             res_bufs: vec![],
 29 |             res_remaining: 0,
 30 |             token: None,
 31 |             interest: EventSet::hup(),
 32 |         }
 33 |     }
 34 | 
 35 |     pub fn writable(&mut self,
 36 |                     event_loop: &mut EventLoop<TrafficCop>)
 37 |                     -> io::Result<()> {
 38 |         if self.res_bufs.len() == 0 {
 39 |             // no responses yet, don't reregister
 40 |             return Ok(())
 41 |         }
 42 |         let mut res_buf = self.res_bufs.remove(0);
 43 | 
 44 |         debug!("res buf: {:?}", res_buf.bytes());
 45 |         match self.sock.try_write_buf(&mut res_buf) {
 46 |             Ok(None) => {
 47 |                 info!("client flushing buf; WOULDBLOCK");
 48 |                 self.interest.insert(EventSet::writable());
 49 |             }
 50 |             Ok(Some(r)) => {
 51 |                 debug!("CONN : we wrote {} bytes!", r);
 52 |                 self.res_remaining -= r;
 53 |                 debug!("remaining: {}", self.res_remaining);
 54 |                 if self.res_remaining == 0 {
 55 |                     // we've written the whole response, now let's wait to read
 56 |                     self.interest.insert(EventSet::readable());
 57 |                     self.interest.remove(EventSet::writable());
 58 |                 }
 59 |             }
 60 |             Err(e) => {
 61 |                 match e.raw_os_error() {
 62 |                     Some(32) => {
 63 |                         info!("client disconnected");
 64 |                     }
 65 |                     Some(e) => info!("not implemented; client os err={:?}", e),
 66 |                     _ => info!("not implemented; client err={:?}", e),
 67 |                 };
 68 |                 // Don't reregister.
 69 |                 return Err(e);
 70 |             }
 71 |         }
 72 | 
 73 |         // push res back if it's not finished
 74 |         if res_buf.remaining() != 0 {
 75 |             self.res_bufs.insert(0, res_buf);
 76 |         }
 77 | 
 78 |         event_loop.reregister(&self.sock,
 79 |                               self.token.unwrap(),
 80 |                               self.interest,
 81 |                               PollOpt::edge() | PollOpt::oneshot())
 82 |     }
 83 | 
 84 |     pub fn readable(&mut self,
 85 |                     event_loop: &mut EventLoop<TrafficCop>)
 86 |                     -> io::Result<()> {
 87 | 
 88 |         // TODO(tyler) get rid of this double copying and read
 89 |         // directly to codec
 90 |         let mut req_buf = ByteBuf::mut_with_capacity(1024);
 91 | 
 92 |         match self.sock.try_read_buf(&mut req_buf) {
 93 |             Ok(None) => {
 94 |                 panic!("got readable, but can't read from the socket");
 95 |             }
 96 |             Ok(Some(r)) => {
 97 |                 debug!("CONN : we read {} bytes!", r);
 98 |                 //T self.interest.remove(EventSet::readable());
 99 |             }
100 |             Err(e) => {
101 |                 info!("not implemented; client err={:?}", e);
102 |                 self.interest.remove(EventSet::readable());
103 |             }
104 |         };
105 | 
106 |         for req in self.req_codec.decode(&mut req_buf.flip()) {
107 |             self.req_tx.send(Envelope {
108 |                 address: Some(self.sock.peer_addr().unwrap()),
109 |                 tok: self.token.unwrap(),
110 |                 msg: req,
111 |             });
112 |         }
113 | 
114 |         event_loop.reregister(&self.sock,
115 |                               self.token.unwrap(),
116 |                               self.interest,
117 |                               PollOpt::edge() | PollOpt::oneshot())
118 |     }
119 | }
120 | 


--------------------------------------------------------------------------------
/src/server/traffic_cop.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{Error, ErrorKind};
  2 | use std::io;
  3 | use std::sync::mpsc::Sender;
  4 | 
  5 | use bytes::{Buf, ByteBuf, alloc};
  6 | use mio::{EventLoop, EventSet, Handler, PollOpt, Token, TryRead, TryWrite};
  7 | use mio::tcp::{TcpListener, TcpSocket};
  8 | use mio::util::Slab;
  9 | use rand::{Rng, thread_rng};
 10 | 
 11 | use server::*;
 12 | use codec;
 13 | 
 14 | pub struct TrafficCop {
 15 |     peers: Vec<Peer>,
 16 |     cli_handler: ConnSet,
 17 |     peer_handler: ConnSet,
 18 | }
 19 | 
 20 | impl TrafficCop {
 21 | 
 22 |     pub fn new(peer_port: u16,
 23 |                cli_port: u16,
 24 |                peer_addrs: Vec<String>,
 25 |                peer_req_tx: Sender<Envelope>,
 26 |                cli_req_tx: Sender<Envelope>)
 27 |                -> io::Result<TrafficCop> {
 28 | 
 29 |         let cli_addr = format!("0.0.0.0:{}", cli_port).parse().unwrap();
 30 |         info!("binding to {} for client connections", cli_addr);
 31 |         let cli_srv_sock = try!(TcpListener::bind(&cli_addr));
 32 | 
 33 |         let peer_addr = format!("0.0.0.0:{}", peer_port).parse().unwrap();
 34 |         info!("binding to {} for peer connections", peer_addr);
 35 |         let peer_srv_sock = try!(TcpListener::bind(&peer_addr));
 36 | 
 37 |         let mut peers = vec![];
 38 |         for peer in peer_addrs {
 39 |             peers.push(Peer {
 40 |                 addr: peer.parse().unwrap(),
 41 |                 sock: None,
 42 |             });
 43 |         }
 44 | 
 45 |         Ok(TrafficCop {
 46 |             peers: peers,
 47 |             cli_handler: ConnSet {
 48 |                 srv_sock: cli_srv_sock,
 49 |                 srv_token: SERVER_CLIENTS,
 50 |                 conns: Slab::new_starting_at(Token(1024), 4096),
 51 |                 req_tx: cli_req_tx,
 52 |             },
 53 |             peer_handler: ConnSet {
 54 |                 srv_sock: peer_srv_sock,
 55 |                 srv_token: SERVER_PEERS,
 56 |                 conns: Slab::new_starting_at(Token(2), 15),
 57 |                 req_tx: peer_req_tx,
 58 |             },
 59 |         })
 60 |     }
 61 | 
 62 |     pub fn run_event_loop(&mut self,
 63 |                           mut event_loop: EventLoop<TrafficCop>)
 64 |                           -> io::Result<()> {
 65 | 
 66 |         event_loop.register_opt(&self.cli_handler.srv_sock,
 67 |                                 SERVER_CLIENTS,
 68 |                                 EventSet::readable(),
 69 |                                 PollOpt::edge() | PollOpt::oneshot())
 70 |                   .unwrap();
 71 | 
 72 |         event_loop.register_opt(&self.peer_handler.srv_sock,
 73 |                                 SERVER_PEERS,
 74 |                                 EventSet::readable(),
 75 |                                 PollOpt::edge() | PollOpt::oneshot())
 76 |                   .unwrap();
 77 | 
 78 |         event_loop.run(self).unwrap();
 79 | 
 80 |         Err(Error::new(ErrorKind::Other, "event_loop shouldn't have returned."))
 81 |     }
 82 | 
 83 |     fn tok_to_sc(&mut self, tok: Token) -> Option<&mut ServerConn> {
 84 |         if tok.as_usize() > 1 && tok.as_usize() <= 128 {
 85 |             self.peer_handler.conns.get_mut(tok)
 86 |         } else if tok.as_usize() > 128 && tok.as_usize() <= 4096 {
 87 |             self.cli_handler.conns.get_mut(tok)
 88 |         } else {
 89 |             error!("bad event loop notification message envelope");
 90 |             None
 91 |         }
 92 |     }
 93 | }
 94 | 
 95 | impl Handler for TrafficCop {
 96 |     type Timeout = ();
 97 |     type Message = Envelope;
 98 | 
 99 |     fn ready(&mut self,
100 |              event_loop: &mut EventLoop<TrafficCop>,
101 |              token: Token,
102 |              events: EventSet) {
103 |         if events.is_hup() || events.is_error() {
104 |             debug!("clearing error or hup connection");
105 |             match token {
106 |                 peer if peer.as_usize() >= 2 && peer.as_usize() <= 16 => {
107 |                     if self.peer_handler.conns.contains(token) {
108 |                         self.peer_handler.conns.remove(token);
109 |                         for peer in self.peers.iter_mut() {
110 |                             if peer.sock == Some(token) {
111 |                                 debug!("dropping disconnected peer socket");
112 |                                 peer.sock = None;
113 |                             }
114 |                         }
115 |                     }
116 |                 }
117 |                 cli if cli.as_usize() >= 1024 && cli.as_usize() <= 4096 => {
118 |                     if self.cli_handler.conns.contains(token) {
119 |                         self.cli_handler.conns.remove(token);
120 |                     }
121 |                 }
122 |                 t => panic!("bad token for error/hup: {}", t.as_usize()),
123 |             }
124 |         }
125 | 
126 |         if events.is_readable() {
127 |             match token {
128 |                 SERVER_PEERS => {
129 |                     debug!("got SERVER_PEERS accept");
130 |                     self.peer_handler.accept(event_loop).or_else(|e| {
131 |                         error!("failed to accept peer: all slots full");
132 |                         Err(e)
133 |                     });
134 |                 }
135 |                 SERVER_CLIENTS => {
136 |                     debug!("got SERVER_CLIENTS accept");
137 |                     self.cli_handler.accept(event_loop).or_else(|e| {
138 |                         error!("failed to accept client: all slots full");
139 |                         Err(e)
140 |                     });
141 |                 }
142 |                 peer if peer.as_usize() >= 2 && peer.as_usize() <= 16 => {
143 |                     self.peer_handler.conn_readable(event_loop, peer).unwrap();
144 |                 }
145 |                 cli if cli.as_usize() >= 1024 && cli.as_usize() <= 4096 => {
146 |                     self.cli_handler.conn_readable(event_loop, cli).unwrap();
147 |                 }
148 |                 t => panic!("unknown token: {}", t.as_usize()),
149 |             }
150 |         }
151 | 
152 |         if events.is_writable() {
153 |             match token {
154 |                 SERVER_PEERS => panic!("received writable for SERVER_PEERS"),
155 |                 SERVER_CLIENTS =>
156 |                     panic!("received writable for token SERVER_CLIENTS"),
157 |                 peer if peer.as_usize() > 1 && peer.as_usize() <= 128 =>
158 |                     self.peer_handler.conn_writable(event_loop, peer),
159 |                 cli if cli.as_usize() > 128 && cli.as_usize() <= 4096 =>
160 |                     self.cli_handler.conn_writable(event_loop, cli),
161 |                 t => panic!("received writable for out-of-range token: {}",
162 |                             t.as_usize()),
163 |             };
164 |         }
165 |     }
166 | 
167 |     // timeout is triggered periodically to (re)establish connections to peers.
168 |     fn timeout(&mut self,
169 |                event_loop: &mut EventLoop<TrafficCop>,
170 |                timeout: ()) {
171 |         for peer in self.peers.iter_mut() {
172 |             if peer.sock.is_none() {
173 |                 debug!("reestablishing connection with peer");
174 |                 let (sock, _) = TcpSocket::v4()
175 |                                     .unwrap()
176 |                                     .connect(&peer.addr)
177 |                                     .unwrap();
178 |                 self.peer_handler.register(sock, event_loop).map(|tok| {
179 |                     peer.sock = Some(tok);
180 |                 });
181 |             }
182 |         }
183 |         debug!("have {:?} peer connections",
184 |                self.peer_handler.conns.count());
185 |         // if leader is None, try to get promise leases, following-up with
186 |         // an abdication if we fail to get quorum after 2s (randomly picked).
187 | 
188 |         // if leader is self, renew after 6s
189 | 
190 |         let mut rng = thread_rng();
191 |         event_loop.timeout_ms((), rng.gen_range(200, 500)).unwrap();
192 |     }
193 | 
194 |     // notify is used to transmit messages
195 |     fn notify(&mut self,
196 |               event_loop: &mut EventLoop<TrafficCop>,
197 |               mut msg: Envelope) {
198 |         let mut toks = vec![];
199 |         if msg.tok == PEER_BROADCAST {
200 |             for peer in self.peers.iter() {
201 |                 peer.sock.map(|tok| toks.push(tok));
202 |             }
203 |         } else {
204 |             toks.push(msg.tok);
205 |         }
206 |         for tok in toks {
207 |             let sco = self.tok_to_sc(tok);
208 |             if sco.is_none() {
209 |                 warn!("got notify for invalid token {}", tok.as_usize());
210 |                 continue;
211 |             }
212 |             let mut sc = sco.unwrap();
213 |             let m = msg.msg.bytes();
214 | 
215 |             let size = 4 + m.len();
216 |             let mut res = unsafe {
217 |                 ByteBuf::from_mem_ref(alloc::heap(size.next_power_of_two()),
218 |                                       size as u32, // cap
219 |                                       0, // pos
220 |                                       size as u32 /* lim */)
221 |                     .flip()
222 |             };
223 | 
224 |             assert!(res.write_slice(&codec::usize_to_array(m.len())) == 4);
225 |             assert!(res.write_slice(m) == m.len());
226 | 
227 |             debug!("adding res to sc.res_bufs: {:?}", res.bytes());
228 | 
229 |             sc.res_remaining += res.bytes().len();
230 |             sc.res_bufs.push(res.flip());
231 | 
232 |             sc.interest.insert(EventSet::writable());
233 | 
234 |             event_loop.reregister(&sc.sock,
235 |                                   tok,
236 |                                   sc.interest,
237 |                                   PollOpt::edge() | PollOpt::oneshot());
238 |         }
239 |     }
240 | }
241 | 


--------------------------------------------------------------------------------
/test/cluster.rs:
--------------------------------------------------------------------------------
  1 | extern crate bytes;
  2 | extern crate rand;
  3 | extern crate mio;
  4 | extern crate uuid;
  5 | 
  6 | use std::collections::BTreeMap;
  7 | use std::fs;
  8 | use std::net::{SocketAddr, SocketAddrV4, Ipv4Addr};
  9 | use std::sync::{Arc, Mutex};
 10 | use std::sync::mpsc::{self, Sender, Receiver, SendError};
 11 | 
 12 | use self::rand::{StdRng, SeedableRng, Rng};
 13 | use self::bytes::{Buf, ByteBuf};
 14 | use self::mio::Token;
 15 | use rasputin::server::rocksdb as db;
 16 | use rasputin::server::{Server, Envelope, State, Peer, InMemoryLog,
 17 |                        LEADER_DURATION, PEER_BROADCAST};
 18 | use rasputin::{Clock, TestClock, Mutation};
 19 | use self::uuid::Uuid;
 20 | 
 21 | // SimCluster facilitates testing a cluster against network failures.
 22 | // This is accomplished by dropping messages, delaying messages, and randomizing
 23 | // which surviving ready messages are chosen in which order (but surviving
 24 | // messages between the same two nodes preserve ordering, because we use a
 25 | // single tcp connection for now)
 26 | 
 27 | enum Condition {
 28 |     Partition { node1: u16, node2: u16 },
 29 |     Paused { node: u16 }
 30 | }
 31 | 
 32 | enum Event {
 33 |     Cron { node: u16 },
 34 |     Receive { to: SocketAddr, env: Envelope },
 35 | }
 36 | 
 37 | pub struct SimServer {
 38 |     path: String,
 39 |     pub server: Server<TestClock, Result<(), SendError<Envelope>>>,
 40 |     clock: Arc<TestClock>,
 41 |     outbound: Receiver<Envelope>,
 42 |     pub tok: Token,
 43 |     addr: SocketAddr,
 44 | }
 45 | 
 46 | pub struct SimCluster {
 47 |     rng: StdRng,
 48 |     clock: u64, // elapsed time in ms
 49 |     events: BTreeMap<u64, Vec<Event>>, // times to events
 50 |     pub nodes: BTreeMap<u16, SimServer>,
 51 |     filters: Vec<Condition>,
 52 | }
 53 | 
 54 | impl SimCluster {
 55 |     pub fn new(dir: &str, num_nodes: u16) -> SimCluster {
 56 |         let mut logs = vec![];
 57 |         for i in 0..num_nodes as usize {
 58 |             logs.push(InMemoryLog {
 59 |                 pending: BTreeMap::new(),
 60 |                 committed: BTreeMap::new(),
 61 |                 quorum: num_nodes as usize / 2 + 1,
 62 |                 last_learned_txid: 0,
 63 |                 last_learned_term: 0,
 64 |                 last_accepted_txid: 0,
 65 |                 last_accepted_term: 0,
 66 |             });
 67 |         }
 68 |         SimCluster::new_from_logs(dir, logs)
 69 |     }
 70 | 
 71 |     pub fn new_from_logs(dir: &str, logs: Vec<InMemoryLog<Mutation>>) -> SimCluster {
 72 |         let mut peers = vec![];
 73 |         let mut peer_strings = vec![];
 74 |         for i in 0..logs.len() {
 75 |             let ip = Ipv4Addr::new(1, 0, (i / 256) as u8, (i % 256) as u8);
 76 |             let port = i as u16;
 77 |             peers.push(SocketAddrV4::new(ip, port));
 78 |             peer_strings.push(format!("{}:{}", ip, port));
 79 |         }
 80 | 
 81 |         let mut nodes = BTreeMap::new();
 82 | 
 83 |         let mut toks = 0;
 84 |         for (peer, rep_log) in peers.iter().zip(logs) {
 85 |             let (tx, rx) = mpsc::channel();
 86 |             
 87 |             let clock = Arc::new(TestClock::new());
 88 | 
 89 |             let state_dir = format!("_rasputin_test/{}/sim_{}",
 90 |                                     dir, peer.port());
 91 |             let server = Server {
 92 |                 clock: clock.clone(),
 93 |                 peer_port: peer.port(),
 94 |                 cli_port: 65535 - peer.port(),
 95 |                 id: Uuid::new_v4().to_string(),
 96 |                 rpc_tx: Box::new(tx),
 97 |                 max_generated_txid: 0,
 98 |                 highest_term: 0,
 99 |                 state: State::Init,
100 |                 db: db::new(state_dir.clone()),
101 |                 rep_log: Box::new(rep_log),
102 |                 peers: peer_strings.clone(),
103 |                 rep_peers: BTreeMap::new(),
104 |                 pending: BTreeMap::new(),
105 |             };
106 | 
107 |             nodes.insert(peer.port(), SimServer {
108 |                 path: state_dir.to_string(),
109 |                 server: server,
110 |                 addr: SocketAddr::V4(SocketAddrV4::new(*peer.ip(), peer.port())),
111 |                 clock: clock.clone(),
112 |                 outbound: rx,
113 |                 tok: Token(toks),
114 |             });
115 | 
116 |             toks += 1;
117 |         }
118 | 
119 |         let seed: &[_] = &[0];
120 |         let mut ns = SimCluster{
121 |             rng: SeedableRng::from_seed(seed),
122 |             clock: 0,
123 |             events: BTreeMap::new(),
124 |             nodes: nodes,
125 |             filters: vec![],
126 |         };
127 | 
128 |         // fire up the servers by queuing their cron
129 |         for i in 0..ns.nodes.len() {
130 |             let time = ns.rng.gen_range(400,500);
131 |             ns.push_event(
132 |                 time,
133 |                 Event::Cron{ node: i as u16 }
134 |             );
135 |         }
136 |         ns
137 |     }
138 | 
139 |     pub fn leaders(&self) -> Vec<u16> {
140 |         self.nodes.iter()
141 |                   .filter(|&(id, n)| n.server.state.is_leader())
142 |                   .map(|(id, n)| *id).collect()
143 |     }
144 | 
145 |     pub fn pause_node(&mut self, node: u16) -> Result<(), ()> {
146 |         // TODO
147 |         Err(())
148 |     }
149 | 
150 |     pub fn unpause_node(&mut self, node: u16) -> Result<(), ()> {
151 |         // TODO
152 |         Err(())
153 |     }
154 | 
155 |     pub fn partition_two_nodes(&mut self, node1: u16, node2: u16) -> Result<(), ()> {
156 |         // TODO
157 |         Err(())
158 |     }
159 | 
160 |     pub fn unpartition_two_nodes(&mut self, node1: u16, node2: u16) -> Result<(), ()> {
161 |         // TODO
162 |         Err(())
163 |     }
164 | 
165 |     pub fn partition_all(&mut self) {
166 |     }
167 | 
168 |     pub fn unpartition_all(&mut self) {
169 |     }
170 | 
171 |     pub fn advance_time(&mut self, ms: u64) {
172 |         self.clock += ms;
173 |         for (_, node) in self.nodes.iter_mut() {
174 |             node.clock.sleep_ms(ms as u32);
175 |         }
176 |     }
177 | 
178 |     fn push_event(&mut self, time: u64, event: Event) {
179 |         match self.events.get_mut(&time) {
180 |             Some(event_vec) => {
181 |                 event_vec.push(event);
182 |                 return;
183 |             },
184 |             None => (),
185 |         };
186 |         self.events.insert(time, vec![event]);
187 |     }
188 | 
189 |     fn pop_event(&mut self) -> (u64, Option<Vec<Event>>) {
190 |         let next_key = self.events.keys().next().unwrap().clone();
191 |         (next_key, self.events.remove(&next_key))
192 |     }
193 | 
194 |     // step works in two phases:
195 |     // 1. handle queued events
196 |     // 2. queue rpc's generated in response to those events
197 |     pub fn step(&mut self) {
198 |         let (time, events) = self.pop_event();
199 |         // move everyone's clocks forward
200 |         let before = self.clock.clone();
201 |         self.advance_time(time - before);
202 |         let after = self.clock.clone();
203 | 
204 |         // Perform event
205 |         for event in events.unwrap() {
206 |             match event {
207 |                 Event::Cron{node:node} => {
208 |                     self.nodes.get_mut(&node).unwrap().server.cron();
209 |                     let time = self.rng.gen_range(400,500);
210 |                     self.push_event(
211 |                         after + time,
212 |                         Event::Cron{ node: node }
213 |                     );
214 |                 },
215 |                 Event::Receive{to:to, env:env} => {
216 |                     let node = self.nodes.get_mut(&to.port()).unwrap();
217 |                     node.server.handle_peer(env);
218 |                 },
219 |             }
220 |         }
221 | 
222 |         // Queue up any outbound messages
223 |         let mut outbound = vec![];
224 |         for (ip, node) in self.nodes.iter_mut() {
225 |             loop {
226 |                 match node.outbound.try_recv() {
227 |                     Ok(env) => outbound.push((node.addr, env)),
228 |                     Err(_) => break, // nothing to send
229 |                 }
230 |             }
231 |         }
232 |         // TODO(tyler) apply filters and node selection randomization
233 |         for (addr, env) in outbound {
234 |             let env_with_return_address = Envelope {
235 |                 address: Some(addr),
236 |                 tok: Token(addr.port() as usize),
237 |                 msg: ByteBuf::from_slice(env.msg.bytes()),
238 |             };
239 |             if env.address.is_none() {
240 |                 // this is a peer broadcast, which will be attempted to be sent
241 |                 // to all connected peers.
242 |                 let ports = self.nodes.len();
243 |                 for port in 0..ports {
244 |                     let arrival = self.clock + 1;
245 |                     self.push_event(arrival, Event::Receive {
246 |                         to: u16_to_socketaddr(port as u16),
247 |                         env: env_with_return_address.clone(),
248 |                     });
249 |                 }
250 |             } else {
251 |                 let arrival = self.clock + 1;
252 |                 self.push_event(arrival, Event::Receive {
253 |                     to: u16_to_socketaddr(env.tok.as_usize() as u16),
254 |                     env: env_with_return_address,
255 |                 });
256 |             }
257 |         }
258 |     }
259 | }
260 | 
261 | impl Drop for SimServer {
262 |     fn drop(&mut self) {
263 |         // TODO(tyler) implement this in rocksdb lib
264 |         // self.server.db.delete();
265 |         fs::remove_dir_all(&self.path);
266 |     }
267 | }
268 | 
269 | fn u16_to_socketaddr(from: u16) -> SocketAddr {
270 |     let ip = Ipv4Addr::new(1, 0, (from / 256) as u8, (from % 256) as u8);
271 |     SocketAddr::V4(SocketAddrV4::new(ip, from))
272 | }
273 | 


--------------------------------------------------------------------------------
/test/test.rs:
--------------------------------------------------------------------------------
1 | extern crate rasputin;
2 | 
3 | mod cluster;
4 | mod test_paxos;
5 | mod test_client;
6 | 


--------------------------------------------------------------------------------
/test/test_client.rs:
--------------------------------------------------------------------------------
 1 | extern crate log;
 2 | use std::sync::mpsc::SendError;
 3 | use std::thread;
 4 | use std::process;
 5 | 
 6 | use rasputin::Client;
 7 | use rasputin::server::Server;
 8 | use rasputin::logging;
 9 | use rasputin::server::{Envelope, LEADER_DURATION, PEER_BROADCAST, State};
10 | use rasputin::RealClock;
11 | use cluster::{SimCluster, SimServer};
12 | use self::log::LogLevel;
13 | 
14 | #[test]
15 | fn client() {
16 |     //logging::init_logger(None, LogLevel::Info).unwrap();
17 |     
18 |     thread::spawn( move || {
19 |         Server::<RealClock, Result<(), SendError<Envelope>>>::run(
20 |             29999,
21 |             39999,
22 |             "_test_client".to_string(),
23 |             vec!["127.0.0.1:29999".to_string()]
24 |         );
25 |     });
26 |     
27 |     thread::sleep_ms(1000);
28 |     let peers = vec!["127.0.0.1:39999".parse().unwrap()];
29 |     let nthreads = 1;
30 |     let mut cli = Client::new(peers, nthreads);
31 |     cli.set(b"k1", b"v1").unwrap();
32 |     assert!(cli.get(b"k1").unwrap().get_value() == b"v1");
33 |     assert!(cli.cas(b"k1", b"v1", b"v12").unwrap().get_value() == b"v12");
34 |     assert!(cli.cas(b"k1", b"vNever", b"vNever2").unwrap().get_value() == b"v12");
35 |     assert!(cli.cas(b"k1", b"vNever", b"vNever2").unwrap().get_success() == false);
36 |     assert!(cli.cas(b"k1", b"v12", b"v13").unwrap().get_value() == b"v13");
37 |     assert!(cli.del(b"k1").unwrap().get_value() == b"v13");
38 |     assert!(cli.get(b"k1").unwrap().get_success() == false);
39 | }
40 | 


--------------------------------------------------------------------------------
/test/test_paxos.rs:
--------------------------------------------------------------------------------
 1 | extern crate log;
 2 | extern crate quickcheck;
 3 | 
 4 | use std::collections::BTreeMap;
 5 | 
 6 | use rasputin::server::Server;
 7 | use rasputin::logging;
 8 | use cluster::{SimCluster, SimServer};
 9 | use self::log::LogLevel;
10 | 
11 | /*
12 |  * Correctness Properties: (Ongaro '14)
13 |  * 1. Election Safety: at most one leader can be elected in a given term.
14 |  * 2. Leader Append-Only: a leader never overwrites or deletes entries in its
15 |  *    log; it only appends new entries.
16 |  * 3. Log Matching: if two logs contain an entry with the same index and term,
17 |  *    then the logs are identical in all entries up through the given index.
18 |  * 4. Leader Completeness: if a log entry is committed in a given term, then
19 |  *    that entry will be present in the logs of the leaders for all
20 |  *    higher-numbered terms.
21 |  * 5. State Machine Safety: if a server has applied a log entry at a given index
22 |  *    to its state machine, no other server will ever apply a different log
23 |  *    entry for the same index.
24 |  */
25 | 
26 | #[test]
27 | fn election_safety() {
28 |     //logging::init_logger(None, LogLevel::Debug).unwrap();
29 |     let mut sim = SimCluster::new("safety", 5);
30 |     let mut leaders = BTreeMap::new();
31 |     for i in 0..3000 {
32 |         sim.step();
33 |         for (id, n) in sim.nodes.iter() {
34 |             if n.server.state.is_leader() {
35 |                 let term = n.server.state.term().unwrap();
36 |                 let tok = n.tok.as_usize();
37 |                 assert!(*leaders.entry(term).or_insert(tok) == tok);
38 |             }
39 |         }
40 |     }
41 | }
42 | 
43 | #[test]
44 | fn stable_leader_with_no_faults() {
45 |     let mut sim = SimCluster::new("stable", 5);
46 |     let mut leader = None;
47 |     for i in 0..3000 {
48 |         sim.step();
49 |         for (id, n) in sim.nodes.iter() {
50 |              match n.server.state.term() {
51 |                  Some(term) => {
52 |                      if leader.is_none() && n.server.state.is_leader() {
53 |                          leader = Some(term);
54 |                      } else if n.server.state.is_leader() {
55 |                          assert!(leader.unwrap() == term);
56 |                      }
57 |                  },
58 |                  None => {
59 |                      // If there's no term, make sure leader was not previously
60 |                      // elected.
61 |                      assert!(leader.is_none());
62 |                  },
63 |              }
64 |         }
65 |     }
66 | }
67 | 
68 | #[test]
69 | fn leader_append_only() {
70 | 
71 | }
72 | 
73 | #[test]
74 | fn log_matching() {
75 | 
76 | }
77 | 
78 | #[test]
79 | fn leader_completeness() {
80 | 
81 | }
82 | 
83 | #[test]
84 | fn state_machine_safety() {
85 | 
86 | }
87 | 


--------------------------------------------------------------------------------