├── .gitignore
├── LICENSE
├── README.md
├── doc
    └── intro.md
├── procedures
    ├── .gitignore
    ├── DirtyReadStrongRead.java
    ├── ExportWrite.java
    ├── MultiTxn.java
    ├── RRegisterUpsert.java
    └── SRegisterStrongRead.java
├── project.clj
├── replication-model
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── doc
    │   └── intro.md
    ├── project.clj
    ├── src
    │   └── replication_model
    │   │   └── core.clj
    └── test
    │   └── replication_model
    │       └── core_test.clj
├── resources
    └── log4j.properties
├── src
    └── jepsen
    │   ├── voltdb.clj
    │   └── voltdb
    │       ├── client.clj
    │       ├── dirty_read.clj
    │       ├── export.clj
    │       ├── multi.clj
    │       ├── nemesis.clj
    │       ├── perf.clj
    │       ├── redundant_register.clj
    │       ├── runner.clj
    │       └── single.clj
├── test
    └── jepsen
    │   ├── voltdb
    │       ├── dirty_read_test.clj
    │       ├── multi_test.clj
    │       ├── perf_test.clj
    │       ├── redundant_register_test.clj
    │       └── single_test.clj
    │   └── voltdb_test.clj
└── writeup
    ├── VoltDB1.pdf
    ├── bugs
    ├── hstore-endofera.pdf
    └── thanks


/.gitignore:
--------------------------------------------------------------------------------
 1 | .lein-repl-history
 2 | case-studies
 3 | roadmap.txt
 4 | log/**
 5 | pom.xml
 6 | pom.xml.asc
 7 | repl-port
 8 | *~
 9 | .nrepl-port
10 | .*.swp
11 | *.log
12 | lib
13 | classes
14 | build
15 | .lein-deps-sum
16 | .lein-failures
17 | target/**
18 | checkouts/**
19 | store/**
20 | writeup/notes
21 | research
22 | tarball.url
23 | license.xml
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
  2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
  3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
  4 | 
  5 | 1. DEFINITIONS
  6 | 
  7 | "Contribution" means:
  8 | 
  9 | a) in the case of the initial Contributor, the initial code and
 10 | documentation distributed under this Agreement, and
 11 | 
 12 | b) in the case of each subsequent Contributor:
 13 | 
 14 | i) changes to the Program, and
 15 | 
 16 | ii) additions to the Program;
 17 | 
 18 | where such changes and/or additions to the Program originate from and are
 19 | distributed by that particular Contributor. A Contribution 'originates' from
 20 | a Contributor if it was added to the Program by such Contributor itself or
 21 | anyone acting on such Contributor's behalf. Contributions do not include
 22 | additions to the Program which: (i) are separate modules of software
 23 | distributed in conjunction with the Program under their own license
 24 | agreement, and (ii) are not derivative works of the Program.
 25 | 
 26 | "Contributor" means any person or entity that distributes the Program.
 27 | 
 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
 29 | necessarily infringed by the use or sale of its Contribution alone or when
 30 | combined with the Program.
 31 | 
 32 | "Program" means the Contributions distributed in accordance with this
 33 | Agreement.
 34 | 
 35 | "Recipient" means anyone who receives the Program under this Agreement,
 36 | including all Contributors.
 37 | 
 38 | 2. GRANT OF RIGHTS
 39 | 
 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
 42 | reproduce, prepare derivative works of, publicly display, publicly perform,
 43 | distribute and sublicense the Contribution of such Contributor, if any, and
 44 | such derivative works, in source code and object code form.
 45 | 
 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
 49 | transfer the Contribution of such Contributor, if any, in source code and
 50 | object code form.  This patent license shall apply to the combination of the
 51 | Contribution and the Program if, at the time the Contribution is added by the
 52 | Contributor, such addition of the Contribution causes such combination to be
 53 | covered by the Licensed Patents. The patent license shall not apply to any
 54 | other combinations which include the Contribution. No hardware per se is
 55 | licensed hereunder.
 56 | 
 57 | c) Recipient understands that although each Contributor grants the licenses
 58 | to its Contributions set forth herein, no assurances are provided by any
 59 | Contributor that the Program does not infringe the patent or other
 60 | intellectual property rights of any other entity. Each Contributor disclaims
 61 | any liability to Recipient for claims brought by any other entity based on
 62 | infringement of intellectual property rights or otherwise. As a condition to
 63 | exercising the rights and licenses granted hereunder, each Recipient hereby
 64 | assumes sole responsibility to secure any other intellectual property rights
 65 | needed, if any. For example, if a third party patent license is required to
 66 | allow Recipient to distribute the Program, it is Recipient's responsibility
 67 | to acquire that license before distributing the Program.
 68 | 
 69 | d) Each Contributor represents that to its knowledge it has sufficient
 70 | copyright rights in its Contribution, if any, to grant the copyright license
 71 | set forth in this Agreement.
 72 | 
 73 | 3. REQUIREMENTS
 74 | 
 75 | A Contributor may choose to distribute the Program in object code form under
 76 | its own license agreement, provided that:
 77 | 
 78 | a) it complies with the terms and conditions of this Agreement; and
 79 | 
 80 | b) its license agreement:
 81 | 
 82 | i) effectively disclaims on behalf of all Contributors all warranties and
 83 | conditions, express and implied, including warranties or conditions of title
 84 | and non-infringement, and implied warranties or conditions of merchantability
 85 | and fitness for a particular purpose;
 86 | 
 87 | ii) effectively excludes on behalf of all Contributors all liability for
 88 | damages, including direct, indirect, special, incidental and consequential
 89 | damages, such as lost profits;
 90 | 
 91 | iii) states that any provisions which differ from this Agreement are offered
 92 | by that Contributor alone and not by any other party; and
 93 | 
 94 | iv) states that source code for the Program is available from such
 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
 96 | or through a medium customarily used for software exchange.
 97 | 
 98 | When the Program is made available in source code form:
 99 | 
100 | a) it must be made available under this Agreement; and
101 | 
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 | 
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 | 
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 | 
111 | 4. COMMERCIAL DISTRIBUTION
112 | 
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering.  The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 | 
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 | 
144 | 5. NO WARRANTY
145 | 
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 | 
157 | 6. DISCLAIMER OF LIABILITY
158 | 
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 | 
168 | 7. GENERAL
169 | 
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 | 
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 | 
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 | 
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 | 
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # jepsen.voltdb
 2 | 
 3 | Jepsen tests for voltdb.
 4 | 
 5 | ## Quickstart
 6 | 
 7 | You'll need a VoltDB tarball to install, either as a local file or an HTTP URL.
 8 | You'll also need a license file in this directory, by default called `license.xml`.
 9 | 
10 | To run a single-key linearizability test with network partitions:
11 | 
12 | ```
13 | lein run test --tarball file://voltdb-ent-12.3.1.tar.gz -w single --nemesis partition
14 | ```
15 | 
16 | To run a full suite of tests with different workloads and nemeses:
17 | 
18 | ```
19 | lein run test-all --tarball file://voltdb-ent-12.3.1.tar.gz
20 | ```
21 | 
22 | To re-analyze a specific test with the current code:
23 | 
24 | ```
25 | lein run analyze store/whatever/some-timestamp/test.jepsen
26 | ```
27 | 
28 | To run a web server for browsing results in store/:
29 | 
30 | ```
31 | lein run serve
32 | ```
33 | 
34 | To build a fat jar that does all of the above:
35 | 
36 | ```
37 | lein uberjar
38 | ```
39 | 
40 | ## Usage
41 | 
42 | ```
43 | lein run test --help
44 | ```
45 | 
46 | ## Running
47 | 
48 | You may need to disable transparent huge pages on DB nodes (or, if running on LXC, on the host OS):
49 | 
50 | ```
51 | sudo bash -c "echo never > /sys/kernel/mm/transparent_hugepage/enabled"
52 | sudo bash -c "echo never > /sys/kernel/mm/transparent_hugepage/defrag"
53 | ```
54 | 
55 | ## License
56 | 
57 | Copyright © 2016 Jepsen, LLC
58 | 
59 | Distributed under the Eclipse Public License either version 1.0 or (at
60 | your option) any later version.
61 | 
62 | ## jepsen maven repo
63 | https://mvnrepository.com/artifact/jepsen/jepsen?repo=clojars
64 | 
65 | ## jepsen github
66 | https://github.com/jepsen-io/voltdb
67 | 
68 | ## clojure repo
69 | https://clojure.org/releases/downloads_older
70 | 


--------------------------------------------------------------------------------
/doc/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction to jepsen.voltdb
2 | 
3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/)
4 | 


--------------------------------------------------------------------------------
/procedures/.gitignore:
--------------------------------------------------------------------------------
1 | *.jar
2 | *.class
3 | 


--------------------------------------------------------------------------------
/procedures/DirtyReadStrongRead.java:
--------------------------------------------------------------------------------
 1 | package jepsen.procedures;
 2 | 
 3 | import org.voltdb.*;
 4 | 
 5 | public class DirtyReadStrongRead extends VoltProcedure {
 6 |   // Never used, but forces the static analyzer to flag this procedure as a
 7 |   // write
 8 |   public final SQLStmt insert = new SQLStmt("INSERT INTO dirty_reads (id) VALUES (-1)");
 9 | 
10 |   public final SQLStmt read = new SQLStmt("SELECT * FROM dirty_reads ORDER BY id ASC;");
11 | 
12 |   public VoltTable[] run() throws VoltAbortException {
13 |     voltQueueSQL(read);
14 |     return voltExecuteSQL();
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/procedures/ExportWrite.java:
--------------------------------------------------------------------------------
 1 | package jepsen.procedures;
 2 | 
 3 | import org.voltdb.*;
 4 | 
 5 | // Takes a partition key and an array of longs to insert, and inserts each as a
 6 | // row into both a table and a stream.
 7 | public class ExportWrite extends VoltProcedure {
 8 |   public final SQLStmt writeTable = new SQLStmt("INSERT INTO export_table (part, value) VALUES (?, ?);");
 9 |   public final SQLStmt writeStream = new SQLStmt("INSERT INTO export_stream (part, value) VALUES (?, ?);");
10 | 
11 |   // Arrays of the function, key, and value for each op in the transaction.
12 |   // We assume string keys and integer values.
13 |   public long run(int part, long[] elements) {
14 |     for (int i = 0; i < elements.length; i++) {
15 |       if (Math.random() < 0.5) {
16 |         voltQueueSQL(writeTable, part, elements[i]);
17 |         voltQueueSQL(writeStream, part, elements[i]);
18 |       } else {
19 |         voltQueueSQL(writeStream, part, elements[i]);
20 |         voltQueueSQL(writeTable, part, elements[i]);
21 |       }
22 |     }
23 |     voltExecuteSQL(true);
24 |     return 0;
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/procedures/MultiTxn.java:
--------------------------------------------------------------------------------
 1 | package jepsen.procedures;
 2 | 
 3 | import org.voltdb.*;
 4 | 
 5 | // Processes arbitrary r/w k/v transactions
 6 | public class MultiTxn extends VoltProcedure {
 7 |   public final SQLStmt write = new SQLStmt("UPDATE multi SET value = ? WHERE system = ? AND key = ?");
 8 |   public final SQLStmt read = new SQLStmt("SELECT * FROM multi WHERE system = ? AND key = ?");
 9 | 
10 |   // Arrays of the function, key, and value for each op in the transaction.
11 |   // We assume string keys and integer values.
12 |   public VoltTable[] run(int system, String[] fs, String[] ks, int[] vs) {
13 |     assert fs.length == ks.length && ks.length == vs.length;
14 | 
15 |     for (int i = 0; i < fs.length; i++) {
16 |       if (fs[i].equals("read")) {
17 |         voltQueueSQL(read, system, ks[i]);
18 |       } else if (fs[i].equals("write")) {
19 |         voltQueueSQL(write, vs[i], system, ks[i]);
20 |       } else {
21 |         throw new IllegalArgumentException("Don't know how to interpret op " + fs[i]);
22 |       }
23 |     }
24 |     return voltExecuteSQL();
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/procedures/RRegisterUpsert.java:
--------------------------------------------------------------------------------
 1 | package jepsen.procedures;
 2 | 
 3 | import org.voltdb.*;
 4 | 
 5 | public class RRegisterUpsert extends VoltProcedure {
 6 |   public final SQLStmt upsert = new SQLStmt("UPSERT INTO rregisters (id, copy, value) VALUES (?, ?, ?);");
 7 | 
 8 |   public VoltTable[] run(long id, long[] copies, long value)
 9 |       throws VoltAbortException {
10 |     for (long copy : copies) {
11 |       voltQueueSQL(upsert, id, copy, value);
12 |     }
13 |     return voltExecuteSQL();
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/procedures/SRegisterStrongRead.java:
--------------------------------------------------------------------------------
 1 | package jepsen.procedures;
 2 | 
 3 | import org.voltdb.*;
 4 | 
 5 | public class SRegisterStrongRead extends VoltProcedure {
 6 |   // Never used, but forces the static analyzer to flag this procedure as a
 7 |   // write
 8 |   public final SQLStmt insert = new SQLStmt("INSERT INTO registers (id, value) VALUES (-1, -1)");
 9 | 
10 |   public final SQLStmt read = new SQLStmt("SELECT * FROM registers WHERE id = ?;");
11 | 
12 |   public VoltTable[] run(int id) throws VoltAbortException {
13 |     voltQueueSQL(read, id);
14 |     return voltExecuteSQL();
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
 1 | (defproject jepsen.voltdb "0.1.0-SNAPSHOT"
 2 |   :description "Jepsen VoltDB tests"
 3 |   :url "https://github.com/jepsen-io/voltdb"
 4 |   :license {:name "Eclipse Public License"
 5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
 6 |   :dependencies [[org.clojure/clojure "1.11.1"]
 7 |                  [jepsen "0.3.3"]
 8 |                  [org.clojure/data.xml "0.0.8"]
 9 |                  [org.voltdb/voltdbclient "12.1.0"]
10 |                  ; VoltDB seems to depend on Netty classes but doesn't declare
11 |                  ; a dependency on it?
12 |                  [io.netty/netty-all "4.1.94.Final"]
13 |                  ; Might need these too?
14 |                  ;[io.netty/netty-tcnative-boringssl-static "2.0.56.Final"]
15 |                  ;[io.netty/netty-tcnative-classes "2.0.56.Final"]
16 |                  ]
17 |   :jvm-opts ["-Xmx8g"
18 |              "-XX:MaxInlineLevel=32"
19 |              "-server"
20 |              "--add-opens" "java.base/java.lang=ALL-UNNAMED"
21 |              "--add-opens" "java.base/sun.nio.ch=ALL-UNNAMED"
22 |              "--add-opens" "java.base/java.net=ALL-UNNAMED"
23 |              "--add-opens" "java.base/java.nio=ALL-UNNAMED"
24 |              "--add-opens" "java.base/sun.net.www.protocol.http=ALL-UNNAMED"
25 |              "--add-opens" "java.base/sun.net.www.protocol.https=ALL-UNNAMED"
26 |              "--add-opens" "java.base/sun.net.www.protocol.file=ALL-UNNAMED"
27 |              "--add-opens" "java.base/sun.net.www.protocol.ftp=ALL-UNNAMED"
28 |              "--add-opens" "java.base/sun.net.www.protocol.jar=ALL-UNNAMED"]
29 |   :main jepsen.voltdb.runner
30 |   :aot  [jepsen.voltdb.runner
31 |          clojure.tools.logging.impl])
32 | 


--------------------------------------------------------------------------------
/replication-model/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /classes
 3 | /checkouts
 4 | pom.xml
 5 | pom.xml.asc
 6 | *.jar
 7 | *.class
 8 | /.lein-*
 9 | /.nrepl-port
10 | .hgignore
11 | .hg/
12 | 


--------------------------------------------------------------------------------
/replication-model/LICENSE:
--------------------------------------------------------------------------------
  1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
  2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
  3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
  4 | 
  5 | 1. DEFINITIONS
  6 | 
  7 | "Contribution" means:
  8 | 
  9 | a) in the case of the initial Contributor, the initial code and
 10 | documentation distributed under this Agreement, and
 11 | 
 12 | b) in the case of each subsequent Contributor:
 13 | 
 14 | i) changes to the Program, and
 15 | 
 16 | ii) additions to the Program;
 17 | 
 18 | where such changes and/or additions to the Program originate from and are
 19 | distributed by that particular Contributor. A Contribution 'originates' from
 20 | a Contributor if it was added to the Program by such Contributor itself or
 21 | anyone acting on such Contributor's behalf. Contributions do not include
 22 | additions to the Program which: (i) are separate modules of software
 23 | distributed in conjunction with the Program under their own license
 24 | agreement, and (ii) are not derivative works of the Program.
 25 | 
 26 | "Contributor" means any person or entity that distributes the Program.
 27 | 
 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
 29 | necessarily infringed by the use or sale of its Contribution alone or when
 30 | combined with the Program.
 31 | 
 32 | "Program" means the Contributions distributed in accordance with this
 33 | Agreement.
 34 | 
 35 | "Recipient" means anyone who receives the Program under this Agreement,
 36 | including all Contributors.
 37 | 
 38 | 2. GRANT OF RIGHTS
 39 | 
 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
 42 | reproduce, prepare derivative works of, publicly display, publicly perform,
 43 | distribute and sublicense the Contribution of such Contributor, if any, and
 44 | such derivative works, in source code and object code form.
 45 | 
 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
 49 | transfer the Contribution of such Contributor, if any, in source code and
 50 | object code form.  This patent license shall apply to the combination of the
 51 | Contribution and the Program if, at the time the Contribution is added by the
 52 | Contributor, such addition of the Contribution causes such combination to be
 53 | covered by the Licensed Patents. The patent license shall not apply to any
 54 | other combinations which include the Contribution. No hardware per se is
 55 | licensed hereunder.
 56 | 
 57 | c) Recipient understands that although each Contributor grants the licenses
 58 | to its Contributions set forth herein, no assurances are provided by any
 59 | Contributor that the Program does not infringe the patent or other
 60 | intellectual property rights of any other entity. Each Contributor disclaims
 61 | any liability to Recipient for claims brought by any other entity based on
 62 | infringement of intellectual property rights or otherwise. As a condition to
 63 | exercising the rights and licenses granted hereunder, each Recipient hereby
 64 | assumes sole responsibility to secure any other intellectual property rights
 65 | needed, if any. For example, if a third party patent license is required to
 66 | allow Recipient to distribute the Program, it is Recipient's responsibility
 67 | to acquire that license before distributing the Program.
 68 | 
 69 | d) Each Contributor represents that to its knowledge it has sufficient
 70 | copyright rights in its Contribution, if any, to grant the copyright license
 71 | set forth in this Agreement.
 72 | 
 73 | 3. REQUIREMENTS
 74 | 
 75 | A Contributor may choose to distribute the Program in object code form under
 76 | its own license agreement, provided that:
 77 | 
 78 | a) it complies with the terms and conditions of this Agreement; and
 79 | 
 80 | b) its license agreement:
 81 | 
 82 | i) effectively disclaims on behalf of all Contributors all warranties and
 83 | conditions, express and implied, including warranties or conditions of title
 84 | and non-infringement, and implied warranties or conditions of merchantability
 85 | and fitness for a particular purpose;
 86 | 
 87 | ii) effectively excludes on behalf of all Contributors all liability for
 88 | damages, including direct, indirect, special, incidental and consequential
 89 | damages, such as lost profits;
 90 | 
 91 | iii) states that any provisions which differ from this Agreement are offered
 92 | by that Contributor alone and not by any other party; and
 93 | 
 94 | iv) states that source code for the Program is available from such
 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
 96 | or through a medium customarily used for software exchange.
 97 | 
 98 | When the Program is made available in source code form:
 99 | 
100 | a) it must be made available under this Agreement; and
101 | 
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 | 
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 | 
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 | 
111 | 4. COMMERCIAL DISTRIBUTION
112 | 
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering.  The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 | 
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 | 
144 | 5. NO WARRANTY
145 | 
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 | 
157 | 6. DISCLAIMER OF LIABILITY
158 | 
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 | 
168 | 7. GENERAL
169 | 
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 | 
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 | 
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 | 
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 | 
210 | This Agreement is governed by the laws of the State of New York and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 | 


--------------------------------------------------------------------------------
/replication-model/README.md:
--------------------------------------------------------------------------------
 1 | # replication-model
 2 | 
 3 | A Clojure library designed to ... well, that part is up to you.
 4 | 
 5 | ## Usage
 6 | 
 7 | FIXME
 8 | 
 9 | ## License
10 | 
11 | Copyright © 2016 FIXME
12 | 
13 | Distributed under the Eclipse Public License either version 1.0 or (at
14 | your option) any later version.
15 | 


--------------------------------------------------------------------------------
/replication-model/doc/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction to replication-model
2 | 
3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/)
4 | 


--------------------------------------------------------------------------------
/replication-model/project.clj:
--------------------------------------------------------------------------------
1 | (defproject replication-model "0.1.0-SNAPSHOT"
2 |   :description "FIXME: write description"
3 |   :url "http://example.com/FIXME"
4 |   :license {:name "Eclipse Public License"
5 |             :url "http://www.eclipse.org/legal/epl-v10.html"}
6 |   :dependencies [[org.clojure/clojure "1.8.0"]])
7 | 


--------------------------------------------------------------------------------
/replication-model/src/replication_model/core.clj:
--------------------------------------------------------------------------------
  1 | (ns replication-model.core
  2 |   "A model of voltdb's replication algorithm"
  3 |   (:require [clojure.set :as set]
  4 |             [clojure.pprint :refer [pprint]]))
  5 | 
  6 | ;; Util
  7 | 
  8 | (defn ->pprint [x desc]
  9 |   (prn)
 10 |   (prn desc)
 11 |   (pprint x)
 12 |   x)
 13 | 
 14 | (defn rand-non-empty-subset
 15 |   "Take some elements from coll as a set"
 16 |   [coll]
 17 |   (set (take (inc (rand-int (count coll))) (shuffle coll))))
 18 | 
 19 | ;; Network
 20 | 
 21 | (defn net
 22 |   "A network is a set of dequeues indexed by [sender recipient] ids, each
 23 |   representing a TCP socket."
 24 |   [node-ids]
 25 |   (into {} (for [a node-ids, b node-ids]
 26 |              [[a b] (clojure.lang.PersistentQueue/EMPTY)])))
 27 | 
 28 | (defn net-nodes
 29 |   "What nodes are in a net?"
 30 |   [net]
 31 |   (distinct (map first (keys net))))
 32 | 
 33 | (defn net-empty?
 34 |   "Are any messages in the net?"
 35 |   [net]
 36 |   (every? empty? (vals net)))
 37 | 
 38 | (defn send-msg
 39 |   "Sends a message on the given network from a to b. Returns net'."
 40 |   [net a b msg]
 41 |   (assert (contains? net [a b]))
 42 |   (update net [a b] conj msg))
 43 | 
 44 | (defn broadcast
 45 |   "Sends a message from node a to bs, or all other nodes in net. Returns net'."
 46 |   ([net a msg]
 47 |    (broadcast net a (remove #{a} (net-nodes net)) msg))
 48 |   ([net a bs msg]
 49 |    (reduce (fn [net b] (send-msg net a b msg))
 50 |            net
 51 |            bs)))
 52 | 
 53 | (defn recv-msg
 54 |   "Receives the next message from the network from a to b, or a random message
 55 |   for b, if no a is given, or any random message if no nodes are given. Returns
 56 |   [net' a b msg], or nil if no message pending."
 57 |   ([net a b]
 58 |    (let [k [a b]
 59 |          q (get net k)]
 60 |      (when-let [msg (peek q)]
 61 |        [(assoc net k (pop q)) a b msg])))
 62 |   ([net b]
 63 |    (->> net
 64 |         net-nodes
 65 |         shuffle
 66 |         (keep #(recv-msg net % b))
 67 |         first))
 68 |   ([net]
 69 |    (->> net
 70 |         keys
 71 |         shuffle
 72 |         (keep (fn [[a b]] (recv-msg net a b)))
 73 |         first)))
 74 | 
 75 | (defn drop-conn
 76 |   "Drops a random or given network connection."
 77 |   ([net]
 78 |    (let [nodes (net-nodes net)]
 79 |      (drop-conn net (rand-nth nodes) (rand-nth nodes))))
 80 |   ([net a b]
 81 |    (assoc net [a b] (clojure.lang.PersistentQueue/EMPTY))))
 82 | 
 83 | ;; State
 84 | 
 85 | (defn node
 86 |   "A fresh node with the given id"
 87 |   [nodes id]
 88 |   {:id      id                  ; Our node id
 89 |    :alive?  true                ; Is this node alive
 90 |    :leader? false               ; Is this node a leader
 91 |    :prev-cluster (set nodes)    ; The previous set of cluster node ids
 92 |    :cluster (set nodes)         ; Set of cluster node ids
 93 |    :applied (sorted-set)        ; Set of applied writes
 94 |    :waiting {}                  ; Map of op ids to sets of nodes waiting
 95 |    :returns {}})                ; Map of op ids to planned return values
 96 | 
 97 | (defn state
 98 |   "A fresh state with n nodes"
 99 |   [n]
100 |   (let [node-ids (range n)]
101 |     {:next-op   0
102 |      :nodes     (-> (zipmap node-ids (map (partial node node-ids)node-ids))
103 |                     (assoc-in [0 :leader?] true))
104 |      :net       (net node-ids)
105 |      :written   (sorted-set)
106 |      :history   []}))
107 | 
108 | (defn rand-node-id
109 |   "Random node id in state"
110 |   [state]
111 |   (rand-nth (keys (:nodes state))))
112 | 
113 | ;; Invariants
114 | 
115 | (defn stale-reads
116 |   "Stale reads are possible when a returned op is not present on a leader."
117 |   [state]
118 |   (let [written (:written state)]
119 |     (->> state
120 |          :nodes
121 |          vals
122 |          (filter :leader?)
123 |          (keep (fn [node]
124 |                  (let [lost (set/difference written (:applied node))]
125 |                    (when-not (empty? lost)
126 |                      {:node (:id node)
127 |                       :stale lost}))))
128 |          seq)))
129 | 
130 | (defn lost-writes
131 |   "We detect lost writes by the presence of a :lost key in the state."
132 |   [state]
133 |   (when-let [lost (:lost state)]
134 |     {:lost-writes lost}))
135 | 
136 | (defn returns-waiting
137 |   "Ensures waiting and return maps are in sync."
138 |   [state]
139 |   (when-not (->> state
140 |                  :nodes
141 |                  vals
142 |                  (every? (fn [node]
143 |                            (= (set (keys (:waiting node)))
144 |                               (set (keys (:returns node)))))))
145 |       {:returns-waiting :not-equal}))
146 | 
147 | (defn invariant
148 |   "All invariants of interest."
149 |   [state]
150 |   (or (lost-writes state)
151 |       (returns-waiting state)))
152 | 
153 | ;; Operations
154 | 
155 | (defn op
156 |   "Generates a new op for a state. Returns [op-type value].
157 | 
158 |   There are two types of operations.
159 | 
160 |   :write  A write operation adds the given value to the :applied set of each
161 |           node, and returns to the :written set.
162 |   :check  A check operation checks to see if any of the given values are
163 |           missing, makes no changes to the node's state, and returns to the
164 |           :lost set, if any ops were not found."
165 |   [s]
166 |   (if (< (rand) 0.5)
167 |     [:write (:next-op s)]
168 |     [:check (:written s)]))
169 | 
170 | (defn apply-op
171 |   "Applies an op to a node and returns [node', ok?, return-val]"
172 |   [node [type value]]
173 |   (case type
174 |     :write [(update node :applied conj value) value]
175 |     :check [node (set/difference value (:applied node))]))
176 | 
177 | ;; State transitions
178 | 
179 | (defn step-start-op
180 |   "Picks a live leader, applies an op to that node locally, records an intended
181 |   return value, and broadcasts an op message to all nodes in the leader's
182 |   cluster."
183 |   [state]
184 |   (when-let [id (->> state :nodes vals
185 |                      (filter :leader?)
186 |                      (filter :alive?)
187 |                      seq rand-nth :id)]
188 |     (let [op-id               (:next-op state)
189 |           [type value :as op] (op state)
190 |           leader              (get (:nodes state) id)
191 |           recipients          (-> (:cluster leader)
192 |                                   (disj id))
193 |           [leader' retval]    (apply-op leader op)
194 |           leader'             (-> leader'
195 |                                   (assoc-in [:waiting op-id] recipients)
196 |                                   (assoc-in [:returns op-id] [type retval]))]
197 |       (assoc state
198 |              :next-op (inc op-id)
199 |              :nodes   (assoc (:nodes state) id leader')
200 |              :net     (broadcast (:net state) id recipients [:apply op-id op])
201 |              :history (conj (:history state) {:step          :start-op
202 |                                               :node          id
203 |                                               :op-id         op-id
204 |                                               :op            op
205 |                                               :broadcast-to  recipients})))))
206 | 
207 | (defn step-recv-msg
208 |   "Processes a random message on an alive node, or returns nil if no messages
209 |   pending. Emits a response to the sender. Nodes reject message from outside
210 |   their cluster."
211 |   [s]
212 |   (when-let [[net' a b msg] (recv-msg (:net s))]
213 |     (let [node (get (:nodes s) b)]
214 |       (when (and (:alive? node) ((:cluster node) a))
215 |         (condp = (first msg)
216 |           ; Apply message locally and acknowledge
217 |           :apply (let [[_ op-id [type value :as op]] msg
218 |                        [node' res] (apply-op node op)
219 |                        response-msg [:ack op-id res]]
220 |                    (-> s
221 |                        (assoc-in [:nodes b] node')
222 |                        (assoc :net (send-msg net' b a response-msg))
223 |                        (update :history conj {:step  :apply
224 |                                               :node  b
225 |                                               :from  a
226 |                                               :op-id op-id
227 |                                               :op    op})))
228 | 
229 |         ; Handle an acknowledgement by removing it from the op's wait set.
230 |         ; If it's already been removed, noop.
231 |         :ack (let [[_ op-id res] msg
232 |                    waiting (get (:waiting node) op-id)
233 |                    s' (if waiting
234 |                         (update-in s [:nodes b :waiting op-id] disj a)
235 |                         s)]
236 |                (-> s'
237 |                    (assoc :net net')
238 |                    (update :history conj {:step  :ack
239 |                                           :node  b
240 |                                           :from  a
241 |                                           :op-id op-id
242 |                                           :noop? (not waiting)})))
243 | 
244 |         ; Handle a negative acknowledgement by canceling the operation
245 |         ; entirely, removing it from pending and returns.
246 |         :nack (let [[_ op-id] msg]
247 |                 (-> s
248 |                     (update-in [:nodes b :waiting] dissoc op-id)
249 |                     (update-in [:nodes b :returns] dissoc op-id)
250 |                     (assoc :net net')
251 |                     (update :history conj {:step :nack
252 |                                            :node b
253 |                                            :from a
254 |                                            :op-id op-id}))))))))
255 | 
256 | (defn return-op
257 |   "Takes a state, a node, and an op id to return. Clears the op id from the
258 |   node's waiting and returns state, and returns a value to the client (global
259 |   state).
260 | 
261 |     :write ops are added to the state's :written set
262 |     :check sets, if nonempty, are added to the state's :lost set"
263 |   [s node op-id]
264 |   (let [[type value :as return] (get (:returns node) op-id)]
265 |     (-> (case type
266 |           :write (update s :written conj value)
267 |           :check (if (empty? value)
268 |                    s
269 |                    (update s :lost set/union value)))
270 |         (update-in [:nodes (:id node) :waiting] dissoc op-id)
271 |         (update-in [:nodes (:id node) :returns] dissoc op-id)
272 |         (update :history conj {:step   :return-op
273 |                                :node   (:id node)
274 |                                :op-id  op-id
275 |                                :return return}))))
276 | 
277 | (defn step-return-op
278 |   "An alive node with an empty waiting set for a given op can use its :returns
279 |   map to return a value to the client (global state)."
280 |   [s]
281 |   (->> (shuffle (vals (:nodes s)))
282 |        (filter :alive?)
283 |        (keep (fn [node]
284 | ;               (prn :considering node)
285 |                (->> (shuffle (vec (:waiting node)))
286 |                     (keep (fn [[op-id waiting-on]]
287 |                             (when (empty? waiting-on)
288 |                               (return-op s node op-id))))
289 |                     first)))
290 |        first))
291 | 
292 | (defn step-conn-lost
293 |   "A network connection could drop, discarding all messages in flight."
294 |   [s]
295 |   (-> s
296 |       (update :net drop-conn)
297 |       (update :history conj {:step :conn-lost})))
298 | 
299 | (defn apply-cluster-change
300 |   "Applies a cluster change to the local node, returning new node state.
301 |   Returns nil if change would be invalid."
302 |   [node change]
303 |   (when (= (:cluster node) (:cluster change))
304 |     (assoc node
305 |            :leader?      (= (:id node) (:leader change))
306 |            :prev-cluster (:cluster node)
307 |            :cluster      (:cluster' change))))
308 | 
309 | (defn step-resolve-fault
310 |   "Take an alive node which believes itself to be a part of its cluster. Take
311 |   all nodes which share our belief about the cluster state, and declare at
312 |   least one of them dead, forming a new candidate cluster. Then broadcast a
313 |   message for them to adopt the newly selected membership.
314 | 
315 |   If the new cluster contains no leader, chooses a new one."
316 |   [s]
317 |   (when-let [node (->> s :nodes vals
318 |                        (filter :alive?)
319 |                        (filter #(contains? (:cluster %) (:id %)))
320 |                        seq
321 |                        rand-nth)]
322 |     (let [c (:cluster node)
323 |           ; In establishing consensus for the new set, we're going to
324 |           ; deal only with live nodes--won't even try to talk to or include
325 |           ; dead ones. We also exclude any nodes which disagree on our
326 |           ; current cluster.
327 |           candidates (->> (disj c (:id node))
328 |                           (map (:nodes s))
329 |                           (filter :alive?)
330 |                           (filter #(= c (:cluster %)))
331 |                           (map :id)
332 |                           set)]
333 |       (when (seq candidates)
334 |         (let [dead    (rand-non-empty-subset candidates)
335 |               c'      (set/difference c dead)
336 |               leaders (->> (:nodes s)
337 |                            (filter :leader?)
338 |                            (filter :alive?)
339 |                            (map :id)
340 |                            set)
341 |               leader' (or (some c' leaders)
342 |                           (->> c' seq rand-nth))
343 |               change  {:cluster   c
344 |                        :cluster'  c'
345 |                        :leader    leader'}]
346 |           (-> s
347 |               ; Apply change locally
348 |               (update-in [:nodes (:id node)] (appply-cluster-change change))
349 |               ; Broadcast to peers
350 |               (assoc-in [:nodes (:id node) :cluster ; TODO HERE
351 |               (assoc :nodes (->> (:nodes s)
352 |                                  (map (fn [[id n]]
353 |                                         (if-not (c' id)
354 |                                           ; Not part of the new cluster; skip
355 |                                           [id n]
356 |                                           ; Part of the new cluster; update
357 |                                           (let [n (if (= id leader')
358 |                                                     (assoc n :leader? true)
359 |                                                     n)]
360 |                                             [id (assoc n
361 |                                                        :prev-cluster
362 |                                                        (:cluster n)
363 |                                                        :cluster c')]))))
364 |                                  (into {})))
365 |               (update :history conj {:step      :resolve-fault
366 |                                      :node      (:id node)
367 |                                      :dead      dead
368 |                                      :cluster   c
369 |                                      :cluster'  c'
370 |                                      :leaders   leaders
371 |                                      :leader'   leader'})))))))
372 | 
373 | (defn step-detect-partition
374 |   "A node can continue running if its current cluster comprises a majority (or
375 |   is exactly half but contains the blessed node) of the previously known
376 |   cluster."
377 |   ([s]
378 |    (when-let [node-id (->> s :nodes (filter :alive?) rand-nth :id)]
379 |     (step-detect-partition s node-id)))
380 |   ([s id]
381 |    (let [node (-> s :nodes (get id))
382 |          c0   (:prev-cluster node)
383 |          c    (:cluster node)
384 |          frac (/ (count c) (count c0))
385 |          maj? (or (< 1/2 frac)
386 |                   (and (= 1/2 frac) (= 0 id)))]
387 |      (when (and (not maj?) (:alive? node))
388 |        (-> s
389 |            (assoc-in [:nodes id :alive?] false)
390 |            (assoc-in [:nodes id :leader] false)
391 |            (assoc-in [:nodes id :waiting] {})
392 |            (assoc-in [:nodes id :returns] {})
393 |            (update :history conj {:step  :detect-partition
394 |                                   :node  id}))))))
395 | 
396 | (defn step-clear-waiting
397 |   "After fault resolution and partition detection, nodes can clear out any
398 |   waiting entries from nodes no longer in their cluster."
399 |   ([s id]
400 |    (let [node      (get (:nodes s) id)
401 |          waiting   (:waiting node)
402 |          cluster   (:cluster node)
403 |          waiting'  (->> waiting
404 |                         (map (fn [[op nodes]]
405 |                                [op (set/intersection nodes cluster)]))
406 |                         (into {}))]
407 |      (when (not= waiting waiting')
408 |        (-> s
409 |            (assoc-in [:nodes id :waiting] waiting')
410 |            (update :history conj {:step     :clear-waiting
411 |                                   :node     id
412 |                                   :waiting  waiting
413 |                                   :waiting' waiting'}))))))
414 | 
415 | (defn on-live
416 |   "Applies (step state node-id) to every live node in the cluster--when steps
417 |   return nil, preserves state as is. Returns new state."
418 |   [state step]
419 |   (->> state :nodes vals (filter :alive?) (map :id)
420 |        (reduce (fn [state node-id]
421 |                  (or (step state node-id) state))
422 |                state)))
423 | 
424 | (defn step
425 |   "Generalized state transition"
426 |   [state]
427 |   (or (step-return-op state)
428 |       (when (< (rand) 0.01)
429 |         (step-conn-lost state))
430 |       (when (< (rand) 0.1)
431 |         ; Atomic process: fault resolution on any node, then a full round of
432 |         ; partition detection, then a full round of clear-waiting.
433 |         (-> state
434 |             step-resolve-fault
435 |             (on-live step-detect-partition)
436 |             (on-live step-clear-waiting)))
437 |       (when (< (rand) 0.9)
438 |         (step-recv-msg state))
439 |       (step-start-op state)
440 |       state))
441 | 
442 | (defn bad-history
443 |   "Given a sequence of states and an error predicate, applies the predicate to
444 |   every state and finds the shortest prefix of history where the predicate
445 |   holds. Yields {:states [...] :error ...} or nil if no error found."
446 |   ([pred states]
447 |    (bad-history pred [] states))
448 |   ([pred passed states]
449 |     (when (seq states)
450 |       (if-let [err (pred (first states))]
451 |         {:states (conj passed (first states))
452 |          :error  err}
453 |         (recur pred (conj passed (first states)) (next states))))))
454 | 
455 | (defn violations
456 |   "Performs n explorations of len steps, looking for an invariant violation."
457 |   [state n len]
458 |   (let [procs (.. Runtime getRuntime availableProcessors)]
459 |     (->> (range procs)
460 |          (map (fn [_]
461 |                 (future
462 |                   (->> (range (Math/ceil (/ n procs)))
463 |                        (keep (fn [i]
464 |                                (->> state
465 |                                     (iterate step)
466 |                                     (take len)
467 |                                     (bad-history invariant))))
468 |                        (sort-by (comp count :states))
469 |                        first))))
470 |          doall
471 |          (map deref)
472 |          (remove nil?)
473 |          (sort-by (comp count :states))
474 |          first)))
475 | 


--------------------------------------------------------------------------------
/replication-model/test/replication_model/core_test.clj:
--------------------------------------------------------------------------------
 1 | (ns replication-model.core-test
 2 |   (:require [clojure.test :refer :all]
 3 |             [clojure.pprint :refer [pprint]]
 4 |             [replication-model.core :refer :all]))
 5 | 
 6 | (deftest net-nodes-test
 7 |   (is (-> (state 3)
 8 |           :net
 9 |           net-nodes
10 |           sort
11 |           (= [0 1 2]))))
12 | 
13 | (deftest broadcast-test
14 |   (is (-> (state 3)
15 |           :net
16 |           (broadcast 1 :hi)
17 |           (= {[0 0] []
18 |               [0 1] []
19 |               [0 2] []
20 |               [1 0] [:hi]
21 |               [1 1] []
22 |               [1 2] [:hi]
23 |               [2 0] []
24 |               [2 1] []
25 |               [2 2] []}))))
26 | 
27 | (deftest recv-msg-test
28 |   (let [n (-> (state 3)
29 |               :net
30 |               (send-msg 1 0 :a)
31 |               (send-msg 2 0 :b)
32 |               (send-msg 1 0 :c))]
33 |     (testing "ordered messages from 1"
34 |       (is (= :a (last (recv-msg n 1 0))))
35 |       (is (= :c (-> (recv-msg n 1 0) first
36 |                     (recv-msg 1 0) last))))
37 | 
38 |     (testing "no order between queues"
39 |       (is (= :b (last (recv-msg n 2 0)))))
40 | 
41 |     (testing "nil response for no more messages"
42 |       (is (= nil (-> (recv-msg n 2 0) first
43 |                      (recv-msg 2 0)))))
44 | 
45 |     (testing "all messages for 0"
46 |       (is (= #{:a :b :c}
47 |              (loop [net n
48 |                     msgs #{}]
49 |                (if-let [[net' _ _ msg] (recv-msg net 0)]
50 |                  (recur net' (conj msgs msg))
51 |                  msgs)))))))
52 | 
53 | (deftest txn-test
54 |   (binding [clojure.pprint/*print-miser-width* 110
55 |             clojure.pprint/*print-right-margin* 110]
56 |     (pprint (nth (iterate step (state 3)) 100))
57 | 
58 |     (let [v (violations (state 3) 100000 20)
59 |           vb (boolean v)]
60 |       (or (is (not vb))
61 |           (do (println "Found violation in" (dec (count (:states v)))
62 |                        "transitions:")
63 |               (doseq [s (:states v)]
64 |                 (prn)
65 |                 (pprint s))
66 |               (prn)
67 |               (println "Violation was:")
68 |               (pprint (:error v)))))))
69 | 


--------------------------------------------------------------------------------
/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | log4j.rootLogger=INFO, console, file
 2 | 
 3 | log4j.logger.log.voltcore.logging=FATAL
 4 | 
 5 | log4j.appender.file=org.apache.log4j.FileAppender
 6 | log4j.appender.file.File=jepsen.log
 7 | log4j.appender.file.layout=org.apache.log4j.EnhancedPatternLayout
 8 | log4j.appender.file.layout.conversionPattern=%d{ISO8601}{GMT}\t%p\t[%t] %c: %m%n
 9 | 
10 | log4j.appender.console=org.apache.log4j.ConsoleAppender
11 | log4j.appender.console.Target=System.out
12 | log4j.appender.console.layout=org.apache.log4j.EnhancedPatternLayout
13 | log4j.appender.console.layout.conversionPattern=%p\t[%t] %c: %m%n
14 | 


--------------------------------------------------------------------------------
/src/jepsen/voltdb.clj:
--------------------------------------------------------------------------------
  1 | (ns jepsen.voltdb
  2 |   "OS and database setup functions, plus some older, currently unused nemeses
  3 |   that should be ported over to voltdb.nemesis."
  4 |   (:require [jepsen [core         :as jepsen]
  5 |              [db           :as db]
  6 |              [control      :as c :refer [|]]
  7 |              [checker      :as checker]
  8 |              [client       :as client]
  9 |              [generator    :as gen]
 10 |              [independent  :as independent]
 11 |              [nemesis      :as nemesis]
 12 |              [net          :as net]
 13 |              [os           :as os]
 14 |              [tests        :as tests]
 15 |              [util         :as util :refer [await-fn meh timeout]]]
 16 |             [jepsen.os.debian     :as debian]
 17 |             [jepsen.control.util  :as cu]
 18 |             [jepsen.control.net   :as cn]
 19 |             [jepsen.voltdb.client :as vc]
 20 |             [knossos.model        :as model]
 21 |             [clojure.data.xml     :as xml]
 22 |             [clojure.string       :as str]
 23 |             [clojure.java.io      :as io]
 24 |             [clojure.java.shell   :refer [sh]]
 25 |             [clojure.pprint :refer [pprint]]
 26 |             [clojure.tools.logging :refer [info warn]]
 27 |             [slingshot.slingshot :refer [try+ throw+]])
 28 |   (:import (org.voltdb VoltTable
 29 |                        VoltType
 30 |                        VoltTableRow)
 31 |            (org.voltdb.client Client
 32 |                               ClientConfig
 33 |                               ClientFactory
 34 |                               ClientResponse
 35 |                               ProcedureCallback)))
 36 | 
 37 | (def username "voltdb")
 38 | (def base-dir "/tmp/jepsen-voltdb")
 39 | (def client-port 21212)
 40 | (def export-csv-file "What file do we export CSV data to?"
 41 |   (str base-dir "/export.csv"))
 42 | 
 43 | (defn os
 44 |   "Given OS, plus python & jdk"
 45 |   [os]
 46 |   (reify os/OS
 47 |     (setup! [_ test node]
 48 |       (os/setup! os test node)
 49 |       (debian/install ["python3" "openjdk-17-jdk-headless"])
 50 |       (c/exec :update-alternatives :--install "/usr/bin/python" "python"
 51 |               "/usr/bin/python3" 1))
 52 | 
 53 |     (teardown! [_ test node]
 54 |       (os/teardown! os test node))))
 55 | 
 56 | (defn install!
 57 |   "Install the given tarball URL"
 58 |   [node url force?]
 59 |   (c/su
 60 |    (if-let [[m path _ filename] (re-find #"^file://((.*/)?([^/]+))$" url)]
 61 |      (do ; We're installing a local tarball from the control node; upload it.
 62 |        (c/exec :mkdir :-p "/tmp/jepsen")
 63 |        (let [remote-path (str "/tmp/jepsen/" filename)]
 64 |          (c/upload path remote-path)
 65 |          (cu/install-archive! (str "file://" remote-path)
 66 |                               base-dir {:force? force?})))
 67 |       ; Probably an HTTP URI; just let install-archive handle it
 68 |      (cu/install-archive! url base-dir {:force? force?}))
 69 |    (c/exec :mkdir (str base-dir "/log"))
 70 |    (cu/ensure-user! username)
 71 |    (c/exec :chown :-R (str username ":" username) base-dir)
 72 |    (info "VoltDB unpacked")))
 73 | 
 74 | (defn deployment-xml
 75 |   "Generate a deployment.xml string for the given test."
 76 |   [test]
 77 |   (xml/emit-str
 78 |    (xml/sexp-as-element
 79 |     [:deployment {}
 80 |      [:cluster {:hostcount (count (:nodes test))
 81 |                 ;                  :sitesperhost 2
 82 |                 ; TODO: Make k configurable
 83 |                 :kfactor (min 4 (dec (count (:nodes test))))}]
 84 |      [:paths {}
 85 |       [:voltdbroot {:path base-dir}]]
 86 |      ; We need to choose a heartbeat high enough so that we can spam
 87 |      ; isolated nodes with requests *before* they kill themselves
 88 |      ; but low enough that a new majority is elected and performs
 89 |      ; some operations.
 90 |      [:heartbeat {:timeout 2}] ; seconds
 91 |      ; TODO: consider changing commandlog enabled to false to speed up startup
 92 |      [:commandlog {:enabled true, :synchronous true, :logsize 128}
 93 |       [:frequency {:time 2}]] ; milliseconds
 94 |      ; Not exactly sure what these do! Adapted from ghostbuster -- KRK 2023
 95 |      [:systemsettings
 96 |       [:flushinterval {:minimum 10}
 97 |        [:export {:interval 10}]]]
 98 |      ; Export configuration, for export tests
 99 |      [:export
100 |       [:configuration {:enabled true, :target "export_target", :type "file"}
101 |        [:property {:name "type"} "csv"]
102 |        [:property {:name "nonce"} "export_target_2"]
103 |        [:property {:name "outdir"} export-csv-file]]]])))
104 | 
105 | (defn init-db!
106 |   "run voltdb init"
107 |   [node]
108 |   (info "Initializing voltdb")
109 |   (c/sudo username
110 |           (c/cd base-dir
111 |               ; We think there's a bug that breaks sqlcmd if it runs early in
112 |               ; the creation of a fresh DB--it'll log "Cannot invoke
113 |               ; java.util.Map.values() because arglists is null". To work
114 |               ; around that, we're creating a table so the schema is nonempty.
115 |                 (let [init-schema "CREATE TABLE work_around_volt_bug (
116 |                                  id int not null
117 |                                  );"
118 |                       init-schema-file "init-schema"]
119 |                   (cu/write-file! init-schema init-schema-file)
120 |                   (c/exec (str base-dir "/bin/voltdb")
121 |                           :init
122 |                           :-s init-schema-file
123 |                           :--config (str base-dir "/deployment.xml")
124 |                           | :tee (str base-dir "/log/stdout.log")))))
125 |   (info node "initialized"))
126 | 
127 | (defn configure!
128 |   "Prepares config files and creates fresh DB."
129 |   [test node]
130 |   (c/sudo username
131 |           (c/cd base-dir
132 |                 (c/upload (:license test) (str base-dir "/license.xml"))
133 |                 (cu/write-file! (deployment-xml test) "deployment.xml")
134 |                 (init-db! node)
135 |                 (c/exec :ln :-f :-s (str base-dir "/voltdbroot/log/volt.log") (str base-dir "/log/volt.log")))))
136 | 
137 | (defn await-log
138 |   "Blocks until voltdb.log contains the given string."
139 |   [line]
140 |   (let [file (str base-dir "/log/volt.log")]
141 |     (c/sudo username
142 |             (c/cd base-dir
143 |                   ; There used to be a sleep here of *four minutes*. Why? --KRK
144 |                   (c/exec :tail :-n 20 file
145 |                           | :grep :-m 1 :-f line
146 |                           ; What is this xargs FOR? What was I thinking seven
147 |                           ; years ago? --KRK, 2023
148 |                           | :xargs (c/lit (str "echo \"\" >> " file
149 |                                                " \\;")))))))
150 | 
151 | (defn await-start
152 |   "Blocks until the node is up, responding to client connections, and
153 |   @SystemInformation OVERVIEW returns."
154 |   [node]
155 |   (info "Waiting for" node "to start")
156 |   (cu/await-tcp-port client-port {:log-interval 30000
157 |                                   :timeout 300000})
158 |   (with-open [conn (vc/connect node {:procedure-call-timeout 100
159 |                                      :reconnect? false})]
160 |     ; Per Ruth, just being able to ask for SystemInformation should indicate
161 |     ; the cluster is ready to use. We'll make sure we get at least one table
162 |     ; back, just in case.
163 |     (await-fn (fn check-system-info []
164 |                 (let [overview (vc/call! conn "@SystemInformation" "OVERVIEW")]
165 |                   (when (empty? overview)
166 |                     (throw+ {:type ::empty-overview}))))
167 |               {:log-message "Waiting for @SystemInformation"
168 |                :log-interval 10000
169 |                :retry-interval 1000
170 |                :timeout 240000}))
171 |   (info node "started"))
172 | 
173 | (defn await-rejoin
174 |   "Blocks until the logfile reports 'Node rejoin completed'"
175 |   [node]
176 |   (info "Waiting for" node "to rejoin")
177 |   (await-log "Node rejoin completed")
178 |   (info node "rejoined"))
179 | 
180 | (defn start-daemon!
181 |   "Starts the VoltDB daemon."
182 |   [test]
183 |   (c/sudo username
184 |           (c/cd base-dir
185 |                 (info "Starting voltdb")
186 |                 (cu/start-daemon! {:logfile (str base-dir "/log/stdout.log")
187 |                                    :pidfile (str base-dir "/pidfile")
188 |                                    :chdir   base-dir}
189 |                                   (str base-dir "/bin/voltdb")
190 |                                   :start
191 |                                   :--count (count (:nodes test))
192 |                                   :--host (->> (:nodes test)
193 |                                                (map cn/ip)
194 |                                                (str/join ","))))))
195 | 
196 | (defn recover!
197 |   "Restarts all nodes in the test."
198 |   [test]
199 |   (c/on-nodes test (partial db/start! (:db test))))
200 | 
201 | (defn rejoin!
202 |   "Rejoins a voltdb node. Serialized to work around a bug in voltdb where
203 |   multiple rejoins can take down cluster nodes."
204 |   [test node]
205 |   ; This bug has been fixed, so we probably don't need to lock here - KRK 2023
206 |   (locking rejoin!
207 |     (info "rejoining" node)
208 |     (db/start! (:db test) test node)
209 |     (await-rejoin node)))
210 | 
211 | (defn stop-recover!
212 |   "Stops all nodes, then recovers all nodes. Useful when Volt's lost majority
213 |   and nodes kill themselves."
214 |   ([test]
215 |    (c/on-nodes test (partial db/kill! (:db test)))
216 |    (recover! test)))
217 | 
218 | (defn sql-cmd!
219 |   "Takes an SQL query and runs it on the local node via sqlcmd"
220 |   [query]
221 |   (c/cd base-dir
222 |         (c/sudo username
223 |                 (c/exec "bin/sqlcmd" (str "--query=" query)))))
224 | 
225 | (defn snarf-procedure-deps!
226 |   "Downloads voltdb.jar from the current node to procedures/, so we can compile
227 |   stored procedures."
228 |   []
229 |   (let [dir  (str base-dir "/voltdb/")
230 |         f    (first (c/cd dir (cu/ls (c/lit "voltdb-*.jar"))))
231 |         src  (str dir f)
232 |         dest (io/file (str "procedures/" f))]
233 |     (when-not (.exists dest)
234 |       (info "Downloading" f "to" (.getCanonicalPath dest))
235 |       (c/download src (.getCanonicalPath dest)))))
236 | 
237 | (defn build-stored-procedures!
238 |   "Compiles and packages stored procedures in procedures/"
239 |   []
240 |   (sh "mkdir" "obj" :dir "procedures/")
241 |   ; Volt currently plans on JDK8, and we're concerned that running on 17 might
242 |   ; be the cause of a bug. Just in case, we'll target compilation back to 11
243 |   ; (the oldest version you can install on Debian Bookworm easily)
244 |   (let [r (sh "bash" "-c" "javac -source 11 -target 11 -classpath \"./:./*\" -d ./obj *.java"
245 |               :dir "procedures/")]
246 |     (when-not (zero? (:exit r))
247 |       (throw (RuntimeException. (str "STDOUT:\n" (:out r)
248 |                                      "\n\nSTDERR:\n" (:err r))))))
249 |   (let [r (sh "jar" "cvf" "jepsen-procedures.jar" "-C" "obj" "."
250 |               :dir "procedures/")]
251 |     (when-not (zero? (:exit r))
252 |       (throw (RuntimeException. (str "STDOUT:\n" (:out r)
253 |                                      "\n\nSTDERR:\n" (:err r)))))))
254 | 
255 | (defn upload-stored-procedures!
256 |   "Uploads stored procedures jar."
257 |   [node]
258 |   (c/upload (.getCanonicalPath (io/file "procedures/jepsen-procedures.jar"))
259 |             (str base-dir "/jepsen-procedures.jar"))
260 |   (info node "stored procedures uploaded"))
261 | 
262 | (defn load-stored-procedures!
263 |   "Load stored procedures into voltdb."
264 |   [node]
265 |   (sql-cmd! "load classes jepsen-procedures.jar")
266 |   (info node "stored procedures loaded"))
267 | 
268 | (defn db
269 |   "VoltDB around the given package tarball URL"
270 |   [url force-download?]
271 |   (reify db/DB
272 |     (setup! [this test node]
273 |       ; Download and unpack
274 |       (install! node url force-download?)
275 | 
276 |       ; Prepare stored procedures in parallel
277 |       (let [procedures (future (when (= node (jepsen/primary test))
278 |                                  (snarf-procedure-deps!)
279 |                                  (build-stored-procedures!)
280 |                                  (upload-stored-procedures! node)))]
281 |         ; Boot
282 |         (configure! test node)
283 |         (db/start! this test node)
284 |         (await-start node)
285 | 
286 |         ; Wait for convergence
287 |         (jepsen/synchronize test 240)
288 | 
289 |         ; Finish procedures
290 |         @procedures
291 |         (when (= node (jepsen/primary test))
292 |           (load-stored-procedures! node))))
293 | 
294 |     (teardown! [this test node]
295 |       (db/kill! this test node)
296 |       (c/su
297 |        (c/exec :rm :-rf (c/lit (str base-dir "/*"))))
298 |       (vc/kill-reconnect-threads!))
299 | 
300 |     db/LogFiles
301 |     (log-files [db test node]
302 |       [(str base-dir "/log/stdout.log")
303 |        (str base-dir "/log/volt.log")
304 |        (str base-dir "/deployment.xml")])
305 | 
306 |     db/Kill
307 |     (kill! [this test node]
308 |       (c/su
309 |         (cu/stop-daemon! (str base-dir "/pidfile"))))
310 | 
311 |     (start! [this test node]
312 |       (start-daemon! test))
313 | 
314 |     db/Pause
315 |     (pause! [this test node]
316 |       ; TODO: target volt specifically
317 |       (c/su (cu/grepkill! :stop "java")))
318 | 
319 |     (resume! [this test node]
320 |       ; TODO: target volt specifically
321 |       (c/su (cu/grepkill! :cont "java")))))
322 | 


--------------------------------------------------------------------------------
/src/jepsen/voltdb/client.clj:
--------------------------------------------------------------------------------
  1 | (ns jepsen.voltdb.client
  2 |   "A wrapper around the VoltDB client library. Includes support functions for
  3 |   opening and closing clients, converting datatypes, handling errors, etc."
  4 |   (:require [clojure [pprint :refer [pprint]]
  5 |                      [string :as str]]
  6 |             [clojure.tools.logging :refer [info warn]]
  7 |             [jepsen.util :as util]
  8 |             [slingshot.slingshot :refer [try+ throw+]])
  9 |   (:import (org.voltdb VoltTable
 10 |                        VoltType
 11 |                        VoltTableRow)
 12 |            (org.voltdb.client Client
 13 |                               ClientConfig
 14 |                               ClientFactory
 15 |                               ClientResponse
 16 |                               ProcedureCallback)))
 17 | 
 18 | (defn close!
 19 |   "Closes a client."
 20 |   [^Client c]
 21 |   (.close c))
 22 | 
 23 | (defn up?
 24 |   "Is the given node ready to accept connections? Returns node, or nil."
 25 |   [node]
 26 |   (let [config (ClientConfig. "" "")]
 27 |     (.setProcedureCallTimeout config 100)
 28 |     (.setConnectionResponseTimeout config 100)
 29 | 
 30 |     (let [c (ClientFactory/createClient config)]
 31 |       (try
 32 |         (.createConnection c (name node))
 33 |         (.getInstanceId c)
 34 |         node
 35 |       (catch java.net.ConnectException e)
 36 |       (finally (close! c))))))
 37 | 
 38 | (defn up-nodes
 39 |   "What DB nodes are actually alive?"
 40 |   [test]
 41 |   (remove nil? (pmap up? (:nodes test))))
 42 | 
 43 | (defn kill-reconnect-threads!
 44 |   "VoltDB client leaks reconnect threads; this kills them all."
 45 |   []
 46 |   (doseq [t (keys (Thread/getAllStackTraces))]
 47 |     (when (= "Retry Connection" (.getName t))
 48 |       ; The reconnect loop swallows Exception so we can't even use interrupt
 49 |       ; here. Luckily I don't think it has too many locks we have to worry
 50 |       ; about.
 51 |       (.stop t))))
 52 | 
 53 | (defn connect
 54 |   "Opens a connection to the given node and returns a voltdb client. Options:
 55 | 
 56 |       :reconnect?
 57 |       :procedure-call-timeout
 58 |       :connection-response-timeout"
 59 |   ([node]
 60 |    (connect node {}))
 61 |   ([node opts]
 62 |    (let [opts (merge {:procedure-call-timeout 100
 63 |                       :connection-response-timeout 1000}
 64 |                      opts)
 65 |          config (doto (ClientConfig. "" "")
 66 |                   ; We don't want to try and connect to all nodes
 67 |                   (.setTopologyChangeAware false)
 68 |                   (.setProcedureCallTimeout (:procedure-call-timeout opts))
 69 |                   (.setConnectionResponseTimeout (:connection-response-timeout opts)))
 70 |          client (ClientFactory/createClient config)]
 71 |      (try
 72 |        (.createConnection client (name node))
 73 |        client
 74 |        (catch Throwable t
 75 |          (.close client)
 76 |          (throw t))))))
 77 | 
 78 | (defn volt-table->map
 79 |   "Converts a VoltDB table to a data structure like
 80 | 
 81 |   {:status status-code
 82 |    :schema [{:column_name VoltType, ...}]
 83 |    :rows [{:k1 v1, :k2 v2}, ...]}"
 84 |   [^VoltTable t]
 85 |   (let [column-count (.getColumnCount t)
 86 |         column-names (loop [i     0
 87 |                             cols  (transient [])]
 88 |                        (if (= i column-count)
 89 |                          (persistent! cols)
 90 |                          (recur (inc i)
 91 |                                 (conj! cols (keyword (.getColumnName t i))))))
 92 |         basis        (apply create-struct column-names)
 93 |         column-types (loop [i 0
 94 |                             types (transient [])]
 95 |                        (if (= i column-count)
 96 |                          (persistent! types)
 97 |                          (recur (inc i)
 98 |                                 (conj! types (.getColumnType t i)))))
 99 |         row          (doto (.cloneRow t)
100 |                        (.resetRowPosition))]
101 |   {:status (.getStatusCode t)
102 |    :schema (apply struct basis column-types)
103 |    :rows (loop [rows (transient [])]
104 |            (if (.advanceRow row)
105 |              (let [cols (object-array column-count)]
106 |                (loop [j 0]
107 |                  (when (< j column-count)
108 |                    (aset cols j (.get row j ^VoltType (nth column-types j)))
109 |                    (recur (inc j))))
110 |                (recur (conj! rows (clojure.lang.PersistentStructMap/construct
111 |                                     basis
112 |                                     (seq cols)))))
113 |              ; Done
114 |              (persistent! rows)))}))
115 | 
116 | (defn call!
117 |   "Call a stored procedure and returns a seq of VoltTable results."
118 |   [^Client client procedure & args]
119 |   (let [res (.callProcedure client procedure (into-array Object args))]
120 |     ; Docs claim callProcedure will throw, but tutorial checks anyway so ???
121 |     (assert (= (.getStatus res) ClientResponse/SUCCESS))
122 |     (map volt-table->map (.getResults res))))
123 | 
124 | (defn async-call!
125 |   "Call a stored procedure asynchronously. Returns a promise of a seq of
126 |   VoltTable results. If a final fn is given, passes ClientResponse to that fn."
127 |   [^Client client procedure & args]
128 |   (let [p (promise)]
129 |     (.callProcedure client
130 |                     (reify ProcedureCallback
131 |                       (clientCallback [this res]
132 |                         (when (fn? (last args))
133 |                           ((last args) res))
134 |                         (deliver p (map volt-table->map (.getResults res)))))
135 |                     procedure
136 |                     (into-array Object (if (fn? (last args))
137 |                                          (butlast args)
138 |                                          args)))))
139 | 
140 | (defn ad-hoc!
141 |   "Run an ad-hoc SQL stored procedure."
142 |   [client & args]
143 |   (apply call! client "@AdHoc" args))
144 | 
145 | (defmacro with-race-retry
146 |   "If you try to perform DDL concurrently using @AdHoc, Volt tends to complain:
147 |   Invalid catalog update(@AdHoc) request: Can't do catalog update(@AdHoc) while
148 |   another one is in progress. Please retry catalog update(@AdHoc) later. This
149 |   macro performs exponential backoff and retry of its body, catching that
150 |   specific error."
151 |   [& body]
152 |   `(loop [tries# 10
153 |           delay# 10]
154 |      (let [r# (try+ ~@body
155 |                     ; This branch catches errors thrown by the sql-cmd! shell
156 |                     ; wrapper. Later we should add one for the actual client.
157 |                     (catch [:type :jepsen.control/nonzero-exit, :exit 255] e#
158 |                       (info "with-race-retry caught")
159 |                       (if (and (pos? tries#)
160 |                                (re-find #"Can't do catalog update.+ while another one is in progress" (:err e#)))
161 |                         ::retry
162 |                         (throw+ e#))))]
163 |        (if (= r# ::retry)
164 |          ; Delay rises by a factor of 1-2 each time
165 |          (let [delay'# (* delay# (+ 1 (rand)))]
166 |            (info "Sleeping for" delay'# "ms")
167 |            (Thread/sleep delay'#)
168 |            (recur (dec tries#) delay'#))
169 |          ; Done
170 |          r#))))
171 | 


--------------------------------------------------------------------------------
/src/jepsen/voltdb/dirty_read.clj:
--------------------------------------------------------------------------------
  1 | (ns jepsen.voltdb.dirty-read
  2 |   "Searches for dirty reads. We're targeting cases where VoltDB allows an
  3 |   unreplicated write to be visible to local reads *before* it's fully
  4 |   replicated. Because VoltDB currently allows stale reads, we can't use a
  5 |   normal read to verify that a transaction actually failed to commit. Instead
  6 |   we finally perform a *strong* read--a select + idempotent update, which
  7 |   forces the coordinator to push the read through the usual strongly-consistent
  8 |   path. Serializability still allows this read to be arbitrarily stale, so we
  9 |   sleep and hope that the write prevents the transaction from being reordered
 10 |   to the past. The optimizer probably isn't THAT clever."
 11 |   (:require [jepsen [core         :as jepsen]
 12 |                     [control      :as c :refer [|]]
 13 |                     [checker      :as checker]
 14 |                     [client       :as client]
 15 |                     [generator    :as gen]
 16 |                     [independent  :as independent]
 17 |                     [nemesis      :as nemesis]
 18 |                     [net          :as net]
 19 |                     [os           :as os]
 20 |                     [util         :as util]
 21 |                     [tests        :as tests]]
 22 |             [jepsen.generator.context :as gen.context]
 23 |             [jepsen.os.debian     :as debian]
 24 |             [jepsen.voltdb        :as voltdb]
 25 |             [jepsen.voltdb [client :as vc]]
 26 |             [knossos.model        :as model]
 27 |             [knossos.op           :as op]
 28 |             [clojure.string       :as str]
 29 |             [clojure.set          :as set]
 30 |             [clojure.core.reducers :as r]
 31 |             [clojure.tools.logging :refer [info warn]]))
 32 | 
 33 | (defn client
 34 |   "A single-register client."
 35 |   ([opts] (client opts (promise) nil nil))
 36 |   ([opts initialized? node conn]
 37 |    (reify client/Client
 38 |      (open! [_ test node]
 39 |        (let [conn (vc/connect
 40 |                     node
 41 |                     (select-keys opts
 42 |                                  [:procedure-call-timeout
 43 |                                   :connection-response-timeout]))]
 44 |          (client opts initialized? node conn)))
 45 | 
 46 |      (setup! [_ test]
 47 |        (when (deliver initialized? true)
 48 |          (c/on node
 49 |                ; Create table
 50 |                (vc/with-race-retry
 51 |                  (voltdb/sql-cmd! "CREATE TABLE dirty_reads (
 52 |                                   id          INTEGER NOT NULL,
 53 |                                   PRIMARY KEY (id)
 54 |                                   );
 55 |                                   PARTITION TABLE dirty_reads ON COLUMN id;")
 56 |                  (voltdb/sql-cmd! "CREATE PROCEDURE FROM CLASS
 57 |                                   jepsen.procedures.DirtyReadStrongRead;")))
 58 |          (info node "table created")))
 59 | 
 60 |      (invoke! [this test op]
 61 |        (try
 62 |          (case (:f op)
 63 |            ; Race conditions ahoy, awful hack
 64 |            :rejoin (if (vc/up? node)
 65 |                      (assoc op :type :ok, :value :already-up)
 66 |                      (do (c/on node (voltdb/rejoin! test node))
 67 |                          (assoc op :type :ok, :value :rejoined)))
 68 | 
 69 |            :read (let [v (->> (:value op)
 70 |                               (vc/call! conn "DIRTY_READS.select")
 71 |                               first
 72 |                               :rows
 73 |                               (map :ID)
 74 |                               first)]
 75 |                    (assoc op :type (if v :ok :fail), :value v))
 76 | 
 77 |            :write (do (vc/call! conn "DIRTY_READS.insert" (:value op))
 78 |                       (assoc op :type :ok))
 79 | 
 80 |            :strong-read (->> (vc/call! conn "DirtyReadStrongRead")
 81 |                              first
 82 |                              :rows
 83 |                              (map :ID)
 84 |                              (into (sorted-set))
 85 |                              (assoc op :type :ok, :value)))
 86 |          (catch org.voltdb.client.NoConnectionsException e
 87 |            ; It'll take a few seconds to come back, might as well take a
 88 |            ; breather
 89 |            (Thread/sleep 1000)
 90 |            (assoc op :type :fail, :error :no-conns))
 91 |          (catch org.voltdb.client.ProcCallException e
 92 |            (assoc op :type :info, :error (.getMessage e)))))
 93 | 
 94 |      (teardown! [_ test])
 95 | 
 96 |      (close! [_ test]
 97 |        (vc/close! conn)))))
 98 | 
 99 | (defn checker
100 |   "Verifies that we never read an element from a transaction which did not
101 |   commmit (and hence was not visible in a final strong read).
102 | 
103 |   Also verifies that every successful write is present in the strong read set."
104 |   []
105 |   (reify checker/Checker
106 |     (check [checker test history opts]
107 |       (let [ok    (filter op/ok? history)
108 |             writes (->> ok
109 |                         (filter #(= :write (:f %)))
110 |                         (map :value)
111 |                         (into (sorted-set)))
112 |             reads (->> ok
113 |                        (filter #(= :read (:f %)))
114 |                        (map :value)
115 |                        (into (sorted-set)))
116 |             strong-read-sets (->> ok
117 |                                   (filter #(= :strong-read (:f %)))
118 |                                   (map :value))
119 |             strong-reads (reduce set/union strong-read-sets)
120 |             unseen       (set/difference strong-reads reads)
121 |             dirty        (set/difference reads strong-reads)
122 |             lost         (set/difference writes strong-reads)]
123 |         ; We expect one strong read per node
124 |         (info :strong-read-sets (count strong-read-sets))
125 |         (info :concurrency (:concurrency test))
126 |         (assert (= (count strong-read-sets) (:concurrency test)))
127 |         ; All strong reads had darn well better be equal
128 |         (assert (apply = (map count (cons strong-reads strong-read-sets))))
129 | 
130 |         {:valid?            (and (empty? dirty) (empty? lost))
131 |          :read-count        (count reads)
132 |          :strong-read-count (count strong-reads)
133 |          :unseen-count      (count unseen)
134 |          :dirty-count       (count dirty)
135 |          :dirty             dirty
136 |          :lost-count        (count lost)
137 |          :lost              lost}))))
138 | 
139 | (defrecord RWGen
140 |   [last-write ; The value we wrote last
141 |    in-flight] ; A vector of in-flight writes on each node; initally nil
142 |   gen/Generator
143 |   (update [this test context event]
144 |     this)
145 | 
146 |   (op [this test context]
147 |     (let [; Lazy initialization of in-flight vector once test is ready
148 |           in-flight (or in-flight (vec (repeat (count (:nodes test)) 0)))
149 |           ; Pick a free process
150 |           process (gen.context/some-free-process context)]
151 |       (if (nil? process)
152 |         [:pending this]
153 |         (let [thread  (gen.context/process->thread context process)
154 |               ; What number node is that?
155 |               n (mod process (count (:nodes test)))]
156 |           (if (= thread n)
157 |             ; The first node-count processes perform writes
158 |             (let [last-write' (inc last-write)
159 |                   in-flight'  (assoc in-flight n last-write')]
160 |               [(gen/fill-in-op {:f :write, :value last-write'} context)
161 |                (RWGen. last-write' in-flight')])
162 |             ; Remaining processes try to read most recent writes
163 |             [(gen/fill-in-op {:f :read, :value (nth in-flight n)} context)
164 |              this]))))))
165 | 
166 | (defn rw-gen
167 |   "While one process writes to a node, we want another process to see that the
168 |   in-flight write is visible, in the instant before the node crashes."
169 |   []
170 |   (RWGen. -1 nil))
171 | 
172 | (defn workload
173 |   "Takes CLI options and constructs a workload map. Special options:
174 | 
175 |       :procedure-call-timeout       How long in ms to wait for proc calls
176 |       :connection-response-timeout  How long in ms to wait for connections"
177 |   [opts]
178 |   {:client          (client opts)
179 |    :checker         (checker)
180 |    :generator       (rw-gen)
181 |    :final-generator (gen/each-thread {:f :strong-read})})
182 | 


--------------------------------------------------------------------------------
/src/jepsen/voltdb/export.clj:
--------------------------------------------------------------------------------
  1 | (ns jepsen.voltdb.export
  2 |   "A workload for testing VoltDB's export mechanism. We perform a series of
  3 |   write operations. Each write op performs a single transactional procedure
  4 |   call which inserts a series of values into both a VoltDB table and an
  5 |   exported stream.
  6 | 
  7 |     {:f :write, :values [3 4]}
  8 | 
  9 |   At the end of the test we read the table (so we know what VoltDB thinks
 10 |   happened).
 11 | 
 12 |     {:f :read-db, :values [1 2 3 4 ...]}
 13 | 
 14 |   ... and the exported data from the stream (so we know what was exported).
 15 | 
 16 |     {:f :read-export, :values [1 2 ...]}
 17 | 
 18 |   We then compare the two to make sure that records aren't lost, and spurious
 19 |   records don't appear in the export."
 20 |   (:require [clojure
 21 |              [pprint :refer [pprint]]
 22 |              [set          :as set]
 23 |              [string       :as str]]
 24 |             [clojure.core.reducers :as r]
 25 |             [clojure.tools.logging :refer [info warn]]
 26 |             [jepsen
 27 |              [checker      :as checker]
 28 |              [client       :as client]
 29 |              [control      :as c]
 30 |              [generator    :as gen]
 31 |              [history      :as h]]
 32 |             [jepsen.voltdb        :as voltdb]
 33 |             [jepsen.voltdb [client :as vc]]))
 34 | 
 35 | (defrecord Client [table-name     ; The name of the table we write to
 36 |                    stream-name    ; The name of the stream we write to
 37 |                    target-name    ; The name of our export target
 38 |                    conn           ; Our VoltDB client connection
 39 |                    node           ; The node we're talking to
 40 |                    initialized?   ; Have we performed one-time initialization?
 41 |                    ]
 42 |   client/Client
 43 |   (open! [this test node]
 44 |     (assoc this
 45 |            :conn (vc/connect node test)
 46 |            :node node))
 47 | 
 48 |   (setup! [_ test]
 49 |     (when (deliver initialized? true)
 50 |       (info node "Creating tables")
 51 |       (c/on node
 52 |             (vc/with-race-retry
 53 |               ; I'm not exactly sure what to do here--we want to test
 54 |               ; partitioned tables, I think, so we'll have an explicit
 55 |               ; partition column and send all our writes to one partition.
 56 |               ;
 57 |               ; The `value` column will actually store written values.
 58 |               (voltdb/sql-cmd! (str
 59 |                 "CREATE TABLE " table-name " (
 60 |                 part   INTEGER NOT NULL,
 61 |                 value  BIGINT NOT NULL
 62 |                 );
 63 |                 PARTITION TABLE " table-name " ON COLUMN value;
 64 | 
 65 |                 CREATE STREAM " stream-name " PARTITION ON COLUMN part
 66 |                 EXPORT TO TARGET export_target (
 67 |                   part INTEGER NOT NULL,
 68 |                   value BIGINT NOT NULL
 69 |                 );"))
 70 |               (voltdb/sql-cmd!
 71 |                 (str "CREATE PROCEDURE FROM CLASS jepsen.procedures.ExportWrite;
 72 |                      PARTITION PROCEDURE ExportWrite ON TABLE " table-name " COLUMN part;"))
 73 |             (info node "tables created")))))
 74 | 
 75 |   (invoke! [_ test op]
 76 |     (try
 77 |       (case (:f op)
 78 |         ; Write to a random partition
 79 |         :write (do (vc/call! conn "ExportWrite"
 80 |                              (rand-int 1000)
 81 |                              (long-array (:value op)))
 82 |                    (assoc op :type :ok))
 83 | 
 84 |         ; TODO: implement this
 85 |         :db-read :unimplemented
 86 | 
 87 |         ; TODO: implement this
 88 |         :export-read :unimplemented
 89 |         )))
 90 | 
 91 |   (teardown! [_ test])
 92 | 
 93 |   (close! [_ test]
 94 |     (vc/close! conn)))
 95 | 
 96 | (defn rand-int-chunks
 97 |   "A lazy sequence of sequential integers grouped into randomly sized small
 98 |   vectors like [1 2] [3 4 5 6] [7] ..."
 99 |   ([] (rand-int-chunks 0))
100 |   ([start]
101 |    (lazy-seq
102 |      (let [chunk-size (inc (rand-int 16))
103 |            end        (+ start chunk-size)
104 |            chunk      (vec (range start end))]
105 |        (cons chunk (rand-int-chunks end))))))
106 | 
107 | (defn checker
108 |   "Basic safety checker. Just checks for set inclusion, not order or
109 |   duplicates."
110 |   []
111 |   ; TODO: This is just a sketch; I haven't gotten to feed this actual results
112 |   ; yet
113 |   (reify checker/Checker
114 |     (check [this test history opts]
115 |       (let [; What elements were acknowledged to the client?
116 |             client-ok (->> history
117 |                            h/oks
118 |                            (h/filter-f :write)
119 |                            (mapcat :value)
120 |                            (into (sorted-set)))
121 |             ; Which elements did we tell the client had failed?
122 |             client-failed (->> history
123 |                                h/fails
124 |                                (h/filter-f :write)
125 |                                (mapcat :value)
126 |                                (into (sorted-set)))
127 |             ; Which elements showed up in the DB reads?
128 |             read-db (->> history
129 |                          h/oks
130 |                          (h/filter-f :read-db)
131 |                          (mapcat :value)
132 |                          (into (sorted-set)))
133 |             ; Which elements showed up in the export?
134 |             read-export (->> history
135 |                              h/oks
136 |                              (h/filter-f :read-export)
137 |                              (mapcat :value)
138 |                              (into (sorted-set)))
139 |             ; Did we lose any writes confirmed to the client?
140 |             lost          (set/difference client-ok read-db)
141 |             ; How far behind the confirmed writes is the table?
142 |             db-unseen     (set/difference read-db read-export)
143 |             ; How far behind the table is the export?
144 |             export-unseen (set/difference read-db read-export)
145 |             ; Writes present in the export but the client thought they failed
146 |             exported-but-client-failed (set/intersection read-export
147 |                                                          client-failed)
148 |            ; Writes present in export but missing from DB
149 |            exported-but-not-in-db (set/difference read-export read-db)]
150 |         {:valid? (and (empty? lost)
151 |                       (empty? exported-but-client-failed)
152 |                       (empty? exported-but-not-in-db))
153 |          :client-ok-count                  (count client-ok)
154 |          :client-failed-count              (count client-failed)
155 |          :read-db-count                    (count read-db)
156 |          :read-export-count                (count read-export)
157 |          :lost-count                       (count lost)
158 |          :db-unseen-count                  (count db-unseen)
159 |          :export-unseen-count              (count export-unseen)
160 |          :exported-but-client-failed-count (count exported-but-client-failed)
161 |          :exported-but-not-in-db-count     (count exported-but-not-in-db)
162 |          :lost                             lost
163 |          :exported-but-client-failed       exported-but-client-failed
164 |          :exported-but-not-in-db           exported-but-not-in-db}))))
165 | 
166 | (defn workload
167 |   "Takes CLI options and constructs a workload map."
168 |   [opts]
169 |   {:client (map->Client {:table-name  "export_table"
170 |                          :stream-name "export_stream"
171 |                          :target-name "export_target"
172 |                          :initialized? (promise)})
173 |    :generator       (->> (rand-int-chunks)
174 |                          (map (fn [chunk]
175 |                                 {:f :write, :value chunk})))
176 |    :final-generator (gen/each-thread
177 |                       [(gen/until-ok {:f :read-db})
178 |                        (gen/until-ok {:f :read-export})])
179 |    :checker (checker)})
180 | 


--------------------------------------------------------------------------------
/src/jepsen/voltdb/multi.clj:
--------------------------------------------------------------------------------
  1 | (ns jepsen.voltdb.multi
  2 |   "A system of multiple registers. Verifies linearizability over each system."
  3 |   (:require [jepsen [core           :as jepsen]
  4 |                     [control        :as c :refer [|]]
  5 |                     [checker        :as checker]
  6 |                     [client         :as client]
  7 |                     [generator      :as gen]
  8 |                     [independent    :as independent]
  9 |                     [nemesis        :as nemesis]
 10 |                     [os             :as os]
 11 |                     [tests          :as tests]]
 12 |             [jepsen.os.debian       :as debian]
 13 |             [jepsen.checker.timeline :as timeline]
 14 |             [jepsen.voltdb          :as voltdb]
 15 |             [jepsen.voltdb [client :as vc]]
 16 |             [knossos.model          :as model]
 17 |             [knossos.op             :as op]
 18 |             [clojure.string         :as str]
 19 |             [clojure.pprint         :refer [pprint]]
 20 |             [clojure.core.reducers  :as r]
 21 |             [clojure.tools.logging  :refer [info warn]])
 22 |   (:import (knossos.model Model)))
 23 | 
 24 | (defn read-only?
 25 |   "Is a transaction a read-only transaction?"
 26 |   [txn]
 27 |   (every? #{:read} (map first txn)))
 28 | 
 29 | (defn client
 30 |   "A multi-register client. Options:
 31 | 
 32 |       :keys                         List of keys to create
 33 |       :system-count                 How many systems to preallocate
 34 |       :procedure-call-timeout       How long in ms to wait for proc calls
 35 |       :connection-response-timeout  How long in ms to wait for connections"
 36 |   ([opts] (client nil (promise) opts nil))
 37 |   ([node initialized? opts conn]
 38 |    (reify client/Client
 39 |      (open! [_ test node]
 40 |        (let [conn (vc/connect
 41 |                     node (select-keys opts
 42 |                                       [:procedure-call-timeout
 43 |                                        :connection-response-timeout]))]
 44 |          (client node initialized? opts conn)))
 45 | 
 46 |      (setup! [_ test]
 47 |        (when (deliver initialized? true)
 48 |          (c/on node
 49 |                (vc/with-race-retry
 50 |                  ; Create table
 51 |                  (voltdb/sql-cmd! "CREATE TABLE multi (
 52 |                                   system      INTEGER NOT NULL,
 53 |                                   key         VARCHAR NOT NULL,
 54 |                                   value       INTEGER NOT NULL,
 55 |                                   PRIMARY KEY (system, key)
 56 |                                   );
 57 |                                   PARTITION TABLE multi ON COLUMN key;")
 58 |                  (voltdb/sql-cmd! "CREATE PROCEDURE FROM CLASS
 59 |                                   jepsen.procedures.MultiTxn;")
 60 |                  (info node "table created")
 61 |                  ; Create initial systems
 62 |                  (dotimes [i (:system-count opts)]
 63 |                    (doseq [k (:keys opts)]
 64 |                      (vc/call! conn "MULTI.insert" i (name k) 0)))))
 65 |          (info node "initial state populated")))
 66 | 
 67 |      (invoke! [this test op]
 68 |      ; {:type :invoke, :f :txn, :value [0 [[:read 1 nil] [:write 2 3]]]}
 69 |        (try
 70 |          (case (:f op)
 71 |            :txn (let [[system txn] (:value op)
 72 |                       fs (->> txn
 73 |                               (map first)
 74 |                               (map name)
 75 |                               (into-array String))
 76 |                       ks (->> txn
 77 |                               (map second)
 78 |                               (map name)
 79 |                               (into-array String))
 80 |                       vs (->> txn
 81 |                               (map #(or (nth % 2) -1)) ; gotta pick an int
 82 |                               (into-array Integer/TYPE))
 83 |                       res (-> conn
 84 |                               (vc/call! "MultiTxn" system fs ks vs))
 85 |                       ; Map results of reads back into read values
 86 |                       txn' (mapv (fn [[f k v :as op] table]
 87 |                                    (case f
 88 |                                      :write op
 89 |                                      :read  (->> table :rows first :VALUE
 90 |                                                  (assoc op 2))))
 91 |                                  txn
 92 |                                  res)]
 93 |                   (assoc op
 94 |                          :type :ok
 95 |                          :value (independent/tuple system txn'))))    
 96 |          ; {:type :ok, :f :txn, :value [0 [[:read 1 4] [:write 2 3]]]}
 97 | 
 98 |          (catch org.voltdb.client.NoConnectionsException e
 99 |            (assoc op :type :fail, :error :no-conns))
100 |          (catch org.voltdb.client.ProcCallException e
101 |            (let [type (if (read-only? (val (:value op))) :fail :info)]
102 |              (condp re-find (.getMessage e)
103 |                #"^No response received in the allotted time"
104 |                (assoc op :type type, :error :timeout)
105 | 
106 |                #"^Connection to database host .+ was lost before a response"
107 |                (assoc op :type type, :error :conn-lost)
108 | 
109 |                #"^Transaction dropped due to change in mastership"
110 |                (assoc op :type type, :error :mastership-change)
111 | 
112 |                (throw e))))))
113 | 
114 |      (teardown! [_ test])
115 | 
116 |      (close! [_ test]
117 |        (vc/close! conn)))))
118 | 
119 | (defn op
120 |   "An op is a tuple of [f k v] like [:read 0 nil], or [:write 2 3]"
121 |   [k]
122 |   (if (< (rand) 0.5)
123 |     [:write k (rand-int 3)]
124 |     [:read  k nil]))
125 | 
126 | (defn op-with-read
127 |   "Like op, but yields sequences of transactions, prepending reads to writes.
128 |   Helps us catch read errors faster, since writes are always legal."
129 |   [k]
130 |   (let [[f k v :as op] (op k)]
131 |     (if (= f :read)
132 |       [op]
133 |       [[:read k nil] op])))
134 | 
135 | (defn txn
136 |   "A transaction is a sequence of [type k v] tuples, e.g. [[:read 0
137 |   3], [:write 1 2]]. For grins, we always perform a read before a write. Yields
138 |   a generator of transactions over key-count registers."
139 |   [ks]
140 |   (let [ks (take (inc (rand-int (count ks))) (shuffle ks))]
141 |     (vec (mapcat op-with-read ks))))
142 | 
143 | (defn txn-gen
144 |   "A generator of transactions on ks"
145 |   [ks]
146 |   (fn [_ _] {:type :invoke, :f :txn, :value (txn ks)}))
147 | 
148 | (defn read-only-txn-gen
149 |   "Generator for read-only transactions."
150 |   [ks]
151 |   (fn [_ _]
152 |     {:type  :invoke
153 |      :f     :txn
154 |      :value (mapv (fn [k] [:read k nil]) ks)}))
155 | 
156 | (defn workload
157 |   "Takes CLI options and constructs a workload map. Special options:
158 | 
159 |       :procedure-call-timeout       How long in ms to wait for proc calls
160 |       :connection-response-timeout  How long in ms to wait for connections"
161 |   [opts]
162 |   (let [ks           [:x :y]
163 |         system-count 1000
164 |         n            (count (:nodes opts))]
165 |     {:client  (client (merge
166 |                         {:keys         ks
167 |                          :system-count system-count}
168 |                         (select-keys opts
169 |                                      [:keys
170 |                                       :system-count
171 |                                       :procedure-call-timeout
172 |                                       :connection-response-timeout])))
173 |      :checker (checker/compose
174 |                 {:linear   (independent/checker
175 |                              (checker/linearizable
176 |                                {:model (model/multi-register
177 |                                          (zipmap ks (repeat 0)))}))
178 |                  :timeline (independent/checker (timeline/html))})
179 |      :generator (independent/concurrent-generator
180 |                   (* 2 n)
181 |                   (range)
182 |                   (fn per-key [id]
183 |                     (->> (txn-gen ks)
184 |                          (gen/reserve n (read-only-txn-gen ks))
185 |                          (gen/limit 150))))}))
186 | 


--------------------------------------------------------------------------------
/src/jepsen/voltdb/nemesis.clj:
--------------------------------------------------------------------------------
  1 | (ns jepsen.voltdb.nemesis
  2 |   "Various packages of faults we can inject against VoltDB clusters."
  3 |   (:require [clojure.pprint :refer [pprint]]
  4 |             [clojure.tools.logging :refer [info warn]]
  5 |             [jepsen [control :as c]
  6 |              [nemesis :as n]
  7 |              [generator :as gen]
  8 |              [net :as net]
  9 |              [util :as util]]
 10 |             [jepsen.control [util :as cu]]
 11 |             [jepsen.nemesis [combined :as nc]
 12 |              [time :as nt]]
 13 |             [jepsen.voltdb :as voltdb]
 14 |             [jepsen.voltdb [client :as vc]]
 15 |             [slingshot.slingshot :refer [try+ throw+]])
 16 |   (:import (org.voltdb.client ClientResponse)))
 17 | 
 18 | 
 19 | (defn rando-nemesis
 20 |   "Tries to confuse VoltDB by spewing random writes into an unrelated table.
 21 |   This happens in an independent thread; the nemesis returns from :invoke
 22 |   immediately, even though it's still writing.
 23 | 
 24 |   Expects invocations of the form {:f :rando, :value some-node}."
 25 |   [opts]
 26 |   (let [initialized? (promise)
 27 |         writes       (atom 0)]
 28 |     (reify n/Nemesis
 29 |       (setup! [this test]
 30 |         (let [node (first (:nodes test))
 31 |               conn (vc/connect node {})]
 32 |           (when (deliver initialized? true)
 33 |             (try
 34 |               (c/on node
 35 |                     (vc/with-race-retry
 36 |                       ; Create table
 37 |                       (voltdb/sql-cmd! "CREATE TABLE mentions (
 38 |                                        well     INTEGER NOT NULL,
 39 |                                        actually INTEGER NOT NULL
 40 |                                        );"))
 41 |                     (info node "mentions table created"))
 42 |               (finally
 43 |                 (vc/close! conn)))))
 44 |         this)
 45 | 
 46 |       (invoke! [this test op]
 47 |         (assert (= :rando (:f op)))
 48 |         (let [conn (vc/connect (:value op)
 49 |                                {:procedure-call-timeout 100
 50 |                                 :reconnect? false})]
 51 |           (future
 52 |             (util/with-thread-name "rando"
 53 |               (try
 54 |                 (let [; Run for 10 seconds
 55 |                       deadline (+ (System/nanoTime)
 56 |                                   (* 10 1e9))]
 57 |                   (loop [i 0]
 58 |                     (when (< (System/nanoTime) deadline)
 59 |                       ; (call! conn "MENTIONS.insert" i (rand-int 1000)))
 60 |                       ; If we go TOO fast we'll start forcing other ops to
 61 |                       ; time out. If we go too slow we won't get a long
 62 |                       ; enough log.
 63 |                       (Thread/sleep 1)
 64 |                       (vc/async-call!
 65 |                         conn "MENTIONS.insert" i (rand-int 1000)
 66 |                         (fn [^ClientResponse res]
 67 |                           (when (or (= ClientResponse/SUCCESS
 68 |                                        (.getStatus res))
 69 |                                     ; not sure why this happens but it's ok?
 70 |                                     (= ClientResponse/UNINITIALIZED_APP_STATUS_CODE
 71 |                                        (.getStatus res)))
 72 |                             (->> res
 73 |                                  .getResults
 74 |                                  (map vc/volt-table->map)
 75 |                                  first
 76 |                                  :rows
 77 |                                  first
 78 |                                  :modified_tuples
 79 |                                  (swap! writes +)))))
 80 |                       (recur (inc i)))))
 81 |                 (catch Exception e
 82 |                   (info "Rando nemesis crashed with" (.getMessage e)))
 83 |                 (finally
 84 |                   (info "Rando nemesis finished writing")
 85 |                   (vc/close! conn))))))
 86 |         (assoc op :value {:running :in-background, :cumulative-writes @writes}))
 87 | 
 88 |       (teardown! [this test])
 89 | 
 90 |       n/Reflection
 91 |       (fs [_] #{:rando}))))
 92 | 
 93 | (defn rando-generator
 94 |   "A generator for rando operations."
 95 |   [opts]
 96 |   (->> (fn [test ctx]
 97 |          {:type :info, :f :rando, :value (rand-nth (:nodes test))})
 98 |        (gen/stagger (:interval opts))))
 99 | 
100 | (defn rando-package
101 |   "A combined nemesis package for injecting random writes into some table."
102 |   [opts]
103 |   (when ((:faults opts) :rando)
104 |     {:nemesis (rando-nemesis opts)
105 |      :generator (rando-generator opts)
106 |      :perf #{{:name "rando"
107 |               :fs [:rando]
108 |               :color "#E9A0E6"}}}))
109 | 
110 | (defn nemesis-package
111 |   "Takes nemesis options (see jepsen.nemesis.combined for details and examples)
112 |   and constructs a package of nemesis and generators."
113 |   [opts]
114 |   (let [opts (update opts :faults set)]
115 |     (-> (nc/nemesis-packages opts)
116 |         ; TODO: custom nemeses--port these over from jepsen.voltdb.
117 |         (concat [(rando-package opts)])
118 |         (->> (remove nil?))
119 |         nc/compose-packages)))
120 | 


--------------------------------------------------------------------------------
/src/jepsen/voltdb/perf.clj:
--------------------------------------------------------------------------------
 1 | (ns jepsen.voltdb.perf
 2 |   "Performance benchmarks for single and multi transactions, with a mixed
 3 |   read-write fixed-concurrency workload, over varying node count. This is
 4 |   currently marooned--it was an experiment during the 2016 Jepsen tests, but
 5 |   isn't hooked up to the CLI."
 6 |   (:require [jepsen [core           :as jepsen]
 7 |                     [control        :as c :refer [|]]
 8 |                     [checker        :as checker]
 9 |                     [client         :as client]
10 |                     [generator      :as gen]
11 |                     [independent    :as independent]
12 |                     [nemesis        :as nemesis]
13 |                     [os             :as os]
14 |                     [tests          :as tests]]
15 |             [jepsen.os.debian       :as debian]
16 |             [jepsen.checker.timeline :as timeline]
17 |             [jepsen.voltdb          :as voltdb]
18 |             [jepsen.voltdb.multi    :as multi]
19 |             [jepsen.voltdb.single   :as single]
20 |             [knossos.model          :as model]
21 |             [knossos.op             :as op]
22 |             [clojure.string         :as str]
23 |             [clojure.pprint         :refer [pprint]]
24 |             [clojure.core.reducers  :as r]
25 |             [clojure.tools.logging  :refer [info warn]]))
26 | 
27 | (defn single-perf-test
28 |   "Special options, in addition to voltdb/base-test:
29 | 
30 |       :strong-reads                 Whether to perform normal or strong selects
31 |       :no-reads                     Don't bother with reads at all
32 |       :procedure-call-timeout       How long in ms to wait for proc calls
33 |       :connection-response-timeout  How long in ms to wait for connections"
34 |   [opts]
35 |   (voltdb/base-test
36 |     (assoc opts
37 |            :name    (str "voltdb perf single " (count (:nodes opts)))
38 |            :client  (single/client (select-keys opts [:strong-reads
39 |                                                       :procedure-call-timeout
40 |                                                       :connection-response-timeout]))
41 |            :model   (model/cas-register nil)
42 |            :checker (checker/perf)
43 |            :concurrency 160
44 |            :generator (->> (independent/concurrent-generator
45 |                              20
46 |                              (range)
47 |                              (fn [id]
48 |                                (->> (gen/mix [single/r
49 |                                               single/r
50 |                                               single/w
51 |                                               single/cas])
52 |                                     (gen/time-limit 60))))
53 |                            (voltdb/general-gen opts)))))
54 | 
55 | (defn multi-perf-test
56 |   "Special options, in addition to voltdb/base-test:
57 | 
58 |       :procedure-call-timeout       How long in ms to wait for proc calls
59 |       :connection-response-timeout  How long in ms to wait for connections"
60 |   [opts]
61 |   (let [ks [:x :y]
62 |         system-count 1000]
63 |     (voltdb/base-test
64 |       (assoc opts
65 |              :name    (str "voltdb perf multi " (count (:nodes opts)))
66 |              :client  (multi/client
67 |                         (merge
68 |                           {:keys         ks
69 |                            :system-count system-count}
70 |                           (select-keys opts
71 |                                        [:keys
72 |                                         :system-count
73 |                                         :procedure-call-timeout
74 |                                         :connection-response-timeout])))
75 |              :model   (model/multi-register (zipmap ks (repeat 0)))
76 |              :checker (checker/perf)
77 |              :concurrency 16
78 |              :generator
79 |              (->> (independent/concurrent-generator
80 |                     2
81 |                     (range)
82 |                     (fn [id]
83 |                       (->> (multi/txn-gen ks)
84 |                            (gen/time-limit 60))))
85 |                   (voltdb/general-gen opts))))))
86 | 


--------------------------------------------------------------------------------
/src/jepsen/voltdb/redundant_register.clj:
--------------------------------------------------------------------------------
 1 | (ns jepsen.voltdb.redundant-register
 2 |   "Implements a single register, but stores that register in n keys at once,
 3 |   all of which should refer to the same value."
 4 |   (:require [jepsen [core         :as jepsen]
 5 |                     [control      :as c :refer [|]]
 6 |                     [checker      :as checker]
 7 |                     [client       :as client]
 8 |                     [generator    :as gen]
 9 |                     [independent  :as independent]
10 |                     [nemesis      :as nemesis]
11 |                     [tests        :as tests]]
12 |             [jepsen.os.debian     :as debian]
13 |             [jepsen.voltdb        :as voltdb]
14 |             [jepsen.voltdb [client :as vc]]
15 |             [knossos.model        :as model]
16 |             [knossos.op           :as op]
17 |             [clojure.string       :as str]
18 |             [clojure.core.reducers :as r]
19 |             [clojure.tools.logging :refer [info warn]]))
20 | 
21 | (defn client
22 |   "A client which implements a register, identified by a key. The register is
23 |   stored in n copies, all of which should agree."
24 |   ([n]
25 |    (client n nil (promise) nil))
26 |   ([n node initialized? conn]
27 |    (reify client/Client
28 |      (open! [_ test node]
29 |        (client n node initialized? (vc/connect node)))
30 | 
31 |      (setup! [_ test]
32 |        (when (deliver initialized? true)
33 |          (c/on node
34 |                (vc/with-race-retry
35 |                  ; Create table
36 |                  (voltdb/sql-cmd! "CREATE TABLE rregisters (
37 |                                   id            INTEGER NOT NULL,
38 |                                   copy          INTEGER NOT NULL,
39 |                                   value         INTEGER NOT NULL,
40 |                                   PRIMARY KEY   (id, copy)
41 |                                   );
42 |                                   PARTITION TABLE rregisters ON COLUMN copy;")
43 |                  (voltdb/sql-cmd! "CREATE PROCEDURE FROM CLASS jepsen.procedures.RRegisterUpsert;")))))
44 | 
45 |      (invoke! [this test op]
46 |        (let [id    (key (:value op))
47 |              value (val (:value op))]
48 |          (case (:f op)
49 |            :read   (let [v (->> (vc/ad-hoc! conn "SELECT value FROM rregisters WHERE id = ? ORDER BY copy ASC;" id)
50 |                                 first
51 |                                 :rows
52 |                                 (map :VALUE))]
53 |                      (assoc op
54 |                             :type :ok
55 |                             :value (independent/tuple id v)))
56 |            :write  (do (vc/call! conn "RRegisterUpsert"
57 |                                  id (long-array (range n)) value)
58 |                        (assoc op :type :ok)))))
59 | 
60 |      (teardown! [_ test])
61 | 
62 |      (close! [_ test]
63 |        (vc/close! conn)))))
64 | 
65 | (defn r   [_ _] {:type :invoke, :f :read, :value nil})
66 | (defn w   [_ _] {:type :invoke, :f :write, :value (rand-int 5)})
67 | 
68 | (defn atomic-checker
69 |   "Verifies that every read shows n identical values for all copies."
70 |   [n]
71 |   (reify checker/Checker
72 |     (check [this test history opts]
73 |       (let [mixed-reads (->> history
74 |                              (r/filter (fn [op]
75 |                                          (let [vs (:value op)]
76 |                                            (and (op/ok? op)
77 |                                                 (= :read (:f op))
78 |                                                 (not= [] vs)
79 |                                                 (or (not= n (count vs))
80 |                                                     (apply not= vs))))))
81 |                              (into []))]
82 |         {:valid? (empty? mixed-reads)
83 |          :mixed-reads mixed-reads}))))
84 | 
85 | (defn workload
86 |   "Takes CLI options and returns a workload for testing redundant registers, to
87 |   be merged into a test map."
88 |   [opts]
89 |   (let [n (count (:nodes opts))]
90 |     {:client   (client n)
91 |      :checker   (independent/checker (atomic-checker n))
92 |      :generator (independent/concurrent-generator
93 |                                (* 2 n)
94 |                                (range)
95 |                                (fn [id]
96 |                                  (->> w
97 |                                       (gen/reserve n r))))}))
98 | 


--------------------------------------------------------------------------------
/src/jepsen/voltdb/runner.clj:
--------------------------------------------------------------------------------
  1 | (ns jepsen.voltdb.runner
  2 |   "Runs VoltDB tests from the command line."
  3 |   (:gen-class)
  4 |   (:require [clojure.pprint :refer [pprint]]
  5 |             [clojure.tools.logging :refer :all]
  6 |             [clojure.string :as str]
  7 |             [clojure.java.io :as io]
  8 |             [jepsen [core :as jepsen]
  9 |                     [checker :as checker]
 10 |                     [cli :as cli]
 11 |                     [generator :as gen]
 12 |                     [os :as os]
 13 |                     [tests :as tests]]
 14 |             [jepsen.checker.timeline :as timeline]
 15 |             [jepsen.os.debian :as debian]
 16 |             [jepsen.voltdb :as voltdb]
 17 |             [jepsen.voltdb [dirty-read :as dirty-read]
 18 |                            [export     :as export]
 19 |                            [multi      :as multi]
 20 |                            [nemesis    :as nemesis]
 21 |                            [single     :as single]
 22 |                            [redundant-register :as redundant-register]]))
 23 | 
 24 | (def workloads
 25 |   "A map of workload names names to functions that take CLI options and return
 26 |   workload maps"
 27 |   {:dirty-read         dirty-read/workload
 28 |    :export             export/workload
 29 |    :multi              multi/workload
 30 |    :redundant-register redundant-register/workload
 31 |    :single             single/workload})
 32 | 
 33 | (def nemeses
 34 |   "All nemesis faults we know about."
 35 |   ; TODO: add bitflip/truncate
 36 |   #{:partition :clock :pause :kill :rando})
 37 | 
 38 | (def special-nemeses
 39 |   "A map of special nemesis names to collections of faults."
 40 |   {:none []
 41 |    :all  [:partition :clock]})
 42 | 
 43 | (def all-nemeses
 44 |   "Combinations of nemeses we run through for test-all"
 45 |   [[]
 46 |    [:rando :kill]
 47 |    [:rando :partition]
 48 |    [:rando :pause]
 49 |    [:rando :clock]
 50 |    [:rando :kill :partition]])
 51 | 
 52 | (defn parse-nemesis-spec
 53 |   "Takes a comma-separated nemesis string and returns a collection of keyword
 54 |   faults."
 55 |   [spec]
 56 |   (->> (str/split spec #",")
 57 |        (map keyword)
 58 |        (mapcat #(get special-nemeses % [%]))))
 59 | 
 60 | (def opt-spec
 61 |   "Command line options for tools.cli"
 62 |   [[nil "--concurrency NUMBER" "How many workers should we run? Must be an integer, optionally followed by n (e.g. 3n) to multiply by the number of nodes."
 63 |     :default  "4n"
 64 |     :validate [(partial re-find #"^\d+n?$")
 65 |                "Must be an integer, optionally followed by n."]]
 66 | 
 67 |    [nil "--force-download" "Re-download tarballs, even if cached locally"]
 68 | 
 69 |    ["-l" "--license FILE" "Path to the VoltDB license file on the control node"
 70 |     :default "license.xml"]
 71 | 
 72 |    [nil "--nemesis FAULTS" "A comma-separated list of faults to inject."
 73 |     :parse-fn parse-nemesis-spec
 74 |     :validate [(partial every? (fn [nem]
 75 |                                  (or (nemeses nem)
 76 |                                      (special-nemeses nem))))
 77 |                (cli/one-of (concat nemeses (keys special-nemeses)))]]
 78 | 
 79 |    [nil "--nemesis-interval SECONDS" "How long between nemesis operations, on average, for each class of fault?"
 80 |     ; In my testing, Volt often takes 20 seconds or so just to start up--we
 81 |     ; don't want to go too fast here.
 82 |     :default  30
 83 |     :parse-fn read-string
 84 |     :validate [pos? "must be positive"]]
 85 | 
 86 |    [nil "--no-reads" "Disable reads, to test write safety only"]
 87 | 
 88 |    ["-p" "--procedure-call-timeout MILLISECONDS"
 89 |     "How long should we wait before timing out procedure calls?"
 90 |     :default 1000
 91 |     :parse-fn #(Long/parseLong %)
 92 |     :validate [pos? "Must be positive"]]
 93 | 
 94 |    ["-r" "--rate HZ" "Approximate number of requests per second, total"
 95 |     :default 100
 96 |     :parse-fn read-string
 97 |     :validate [#(and (number? %) (pos? %)) "must be a positive number"]]
 98 | 
 99 |    [nil "--recovery-delay SECONDS"
100 |     "How long should we wait before killing nodes and recovering?"
101 |     :default 0
102 |     :parse-fn #(Long/parseLong %)
103 |     :validate [pos? "Must be positive"]]
104 | 
105 |    [nil "--strong-reads" "Use stored procedure including a write for all reads"]
106 | 
107 |    [nil "--skip-os" "Don't perform OS setup"]
108 | 
109 |    ["-u" "--tarball URL" "URL for the VoltDB tarball to install. May be either HTTP, HTTPS, or a local file on this control node. For instance, --tarball https://foo.com/voltdb-ent.tar.gz, or file://voltdb-ent.tar.gz"
110 |     :validate [(partial re-find #"^(file|https?)://.*\.(tar)")
111 |                "Must be a file://, http://, or https:// URL including .tar"]]
112 | 
113 |    ["-w" "--workload NAME" "What workload should we run?"
114 |     :parse-fn keyword
115 |     :validate [workloads (cli/one-of workloads)]]])
116 | 
117 | (defn voltdb-test
118 |   "Takes parsed CLI options from -main and constructs a Jepsen test map."
119 |   [opts]
120 |   (let [workload-name (:workload opts)
121 |         ; Right now workloads construct entire test maps. We'll refactor this
122 |         ; in the next commit.
123 |         workload ((workloads workload-name) opts)
124 |         db       (voltdb/db (:tarball opts) (:force-download opts))
125 |         nemesis (nemesis/nemesis-package
126 |                   {:db        db
127 |                    :nodes     (:nodes test)
128 |                    :faults    (:nemesis opts)
129 |                    ; TODO: add support for targeting primaries
130 |                    :partition {:targets [:majority :majorities-ring]}
131 |                    :pause     {:targets [:one :majority :all]}
132 |                    :kill      {:targets [:one :majority :all]}
133 |                    :interval  (:nemesis-interval opts)})
134 |         gen (->> (:generator workload)
135 |                  (gen/stagger (/ (:rate opts)))
136 |                  (gen/nemesis
137 |                    [(gen/sleep 5)
138 |                     (:generator nemesis)])
139 |                  (gen/time-limit (:time-limit opts)))
140 |         ; Is there a final generator for this workload?
141 |         gen (if-let [final (:final-generator workload)]
142 |               (gen/phases gen
143 |                           ; Recovery
144 |                           (gen/log "Recovering cluster")
145 |                           (gen/nemesis (:final-generator nemesis))
146 |                           (gen/log "Waiting for recovery")
147 |                           (gen/sleep 30)
148 |                           ; Final generators
149 |                           (gen/clients final))
150 |               ; No final generator
151 |               gen)]
152 |     (merge tests/noop-test
153 |            opts
154 |            {:name (str (name workload-name)
155 |                        " " (str/join "," (map name (:nemesis opts))))
156 |             :os        (if (:skip-os opts)
157 |                          os/noop
158 |                          (voltdb/os debian/os))
159 |             :generator gen
160 |             :client    (:client workload)
161 |             :nemesis   (:nemesis nemesis)
162 |             :db        db
163 |             :checker   (checker/compose
164 |                          {:perf       (checker/perf {:nemeses (:perf nemesis)})
165 |                           :clock      (checker/clock-plot)
166 |                           :stats      (checker/stats)
167 |                           :exceptions (checker/unhandled-exceptions)
168 |                           :workload   (:checker workload)})})))
169 | 
170 | (defn all-tests
171 |   "Turns CLI options into a sequence of tests to perform."
172 |   [opts]
173 |   (let [nemeses   (if-let [n (:nemesis opts)]  [n] all-nemeses)
174 |         workloads (if-let [w (:workload opts)] [w] (keys workloads))]
175 |     (for [n nemeses, w workloads, i (range (:test-count opts))]
176 |       (voltdb-test (assoc opts :workload w, :nemesis n)))))
177 | 
178 | (defn -main
179 |   "Main entry point for the CLI. Takes CLI options and runs tests, launches a
180 |   web server, analyzes results, etc."
181 |   [& args]
182 |   (cli/run! (merge (cli/single-test-cmd {:test-fn voltdb-test
183 |                                          :opt-spec opt-spec})
184 |                    (cli/test-all-cmd {:tests-fn all-tests
185 |                                       :opt-spec opt-spec})
186 |                    (cli/serve-cmd))
187 |             args))
188 | 


--------------------------------------------------------------------------------
/src/jepsen/voltdb/single.clj:
--------------------------------------------------------------------------------
  1 | (ns jepsen.voltdb.single
  2 |   "Implements a table of single registers identified by id. Verifies
  3 |   linearizability over independent registers."
  4 |   (:require [jepsen [core         :as jepsen]
  5 |                     [control      :as c :refer [|]]
  6 |                     [checker      :as checker]
  7 |                     [client       :as client]
  8 |                     [generator    :as gen]
  9 |                     [independent  :as independent]
 10 |                     [nemesis      :as nemesis]
 11 |                     [os           :as os]
 12 |                     [tests        :as tests]]
 13 |             [jepsen.os.debian     :as debian]
 14 |             [jepsen.checker.timeline :as timeline]
 15 |             [jepsen.voltdb        :as voltdb]
 16 |             [jepsen.voltdb [client :as vc]]
 17 |             [knossos.model        :as model]
 18 |             [clojure.string       :as str]
 19 |             [clojure.core.reducers :as r]
 20 |             [clojure.tools.logging :refer [info warn]]))
 21 | 
 22 | (defn client
 23 |   "A single-register client. Options:
 24 | 
 25 |       :strong-reads                 Whether to perform normal or strong selects
 26 |       :procedure-call-timeout       How long in ms to wait for proc calls
 27 |       :connection-response-timeout  How long in ms to wait for connections"
 28 |   ([opts] (client nil nil (promise) opts))
 29 |   ([conn node initialized? opts]
 30 |    (reify client/Client
 31 |      (open! [_ test node]
 32 |        (let [conn (vc/connect
 33 |                     node (select-keys opts
 34 |                                       [:procedure-call-timeout
 35 |                                        :connection-response-timeout]))]
 36 |          (client conn node initialized? opts)))
 37 | 
 38 |      (setup! [_ test]
 39 |        (when (deliver initialized? true)
 40 |          (info node "creating table")
 41 |          (vc/with-race-retry
 42 |            (c/on node
 43 |                  ; Create table
 44 |                  (voltdb/sql-cmd! "CREATE TABLE registers (
 45 |                                   id          INTEGER UNIQUE NOT NULL,
 46 |                                   value       INTEGER NOT NULL,
 47 |                                   PRIMARY KEY (id)
 48 |                                   );
 49 |                                   PARTITION TABLE registers ON COLUMN id;")
 50 |                  (voltdb/sql-cmd! "CREATE PROCEDURE registers_cas
 51 |                                   PARTITION ON TABLE registers COLUMN id
 52 |                                   AS
 53 |                                   UPDATE registers SET value = ?
 54 |                                   WHERE id = ? AND value = ?;")
 55 |                  (voltdb/sql-cmd! "CREATE PROCEDURE FROM CLASS
 56 |                                   jepsen.procedures.SRegisterStrongRead;")
 57 |                  (voltdb/sql-cmd! "PARTITION PROCEDURE SRegisterStrongRead
 58 |                                   ON TABLE registers COLUMN id;")))
 59 |          (info node "table created")))
 60 | 
 61 |      (invoke! [this test op]
 62 |        ;(info "Process " (:process op) "using node" node)
 63 |        (try
 64 |          (let [id     (key (:value op))
 65 |                value  (val (:value op))]
 66 |            (case (:f op)
 67 |              :read   (let [proc (if (:strong-reads opts)
 68 |                                   "SRegisterStrongRead"
 69 |                                   "REGISTERS.select")
 70 |                            v (-> conn
 71 |                                  (vc/call! proc id)
 72 |                                  first
 73 |                                  :rows
 74 |                                  first
 75 |                                  :VALUE)]
 76 |                        (assoc op
 77 |                               :type :ok
 78 |                               :value (independent/tuple id v)))
 79 |              :write (do (vc/call! conn "REGISTERS.upsert" id value)
 80 |                         (assoc op :type :ok))
 81 |              :cas   (let [[v v'] value
 82 |                           res (-> conn
 83 |                                   (vc/call! "registers_cas" v' id v)
 84 |                                   first
 85 |                                   :rows
 86 |                                   first
 87 |                                   :modified_tuples)]
 88 |                       (assert (#{0 1} res))
 89 |                       (assoc op :type (if (zero? res) :fail :ok)))))
 90 |          ;(catch org.voltdb.client.NoConnectionsException e
 91 |          ;  (Thread/sleep 1000)
 92 |          ;  (assoc op :type :fail, :error :no-conns))
 93 |          (catch org.voltdb.client.ProcCallException e
 94 |            (let [type (if (= :read (:f op)) :fail :info)]
 95 |              (condp re-find (.getMessage e)
 96 |                #"^No response received in the allotted time"
 97 |                (assoc op :type type, :error :timeout)
 98 | 
 99 |                #"^Connection to database host .+ was lost before a response"
100 |                (assoc op :type type, :error :conn-lost)
101 | 
102 |                #"^Transaction dropped due to change in mastership"
103 |                (assoc op :type type, :error :mastership-change)
104 | 
105 |                (throw e))))))
106 | 
107 |      (teardown! [_ test])
108 | 
109 |      (close! [_ test]
110 |        (vc/close! conn)))))
111 | 
112 | (defn r   [_ _] {:type :invoke, :f :read, :value nil})
113 | (defn w   [_ _] {:type :invoke, :f :write, :value (rand-int 5)})
114 | (defn cas [_ _] {:type :invoke, :f :cas, :value [(rand-int 5) (rand-int 5)]})
115 | 
116 | (defn workload
117 |   "Takes CLI options and constructs a workload map. Special options
118 | 
119 |       :strong-reads                 Whether to perform normal or strong selects
120 |       :no-reads                     Don't bother with reads at all
121 |       :procedure-call-timeout       How long in ms to wait for proc calls
122 |       :connection-response-timeout  How long in ms to wait for connections"
123 |   [opts]
124 |   (let [n (count (:nodes opts))]
125 |     {:client  (client (select-keys opts [:strong-reads
126 |                                          :procedure-call-timeout
127 |                                          :connection-response-timeout]))
128 |      :checker (checker/compose
129 |                 {:linear   (independent/checker
130 |                              (checker/linearizable
131 |                                {:model (model/cas-register nil)}))
132 |                  :timeline (independent/checker (timeline/html))})
133 |      :generator (independent/concurrent-generator
134 |                   (* 2 n)
135 |                   (range)
136 |                   (fn per-key [id]
137 |                     ; First n processes do writes/cas, the others do reads
138 |                     ; (or cas, if no-reads is true).
139 |                     (->> (gen/mix [w cas])
140 |                          (gen/reserve n (if (:no-reads opts)
141 |                                           (gen/stagger 2 cas)
142 |                                           r))
143 |                          (gen/limit 150))))}))
144 | 


--------------------------------------------------------------------------------
/test/jepsen/voltdb/dirty_read_test.clj:
--------------------------------------------------------------------------------
 1 | (ns jepsen.voltdb.dirty-read-test
 2 |   (:require [clojure.test :refer :all]
 3 |             [jepsen.voltdb.dirty-read :refer :all]
 4 |             [jepsen.voltdb-test :refer [tarball]]
 5 |             [jepsen.core :as jepsen]))
 6 | 
 7 | (deftest a-test
 8 |   (loop []
 9 |     (when (is (:valid? (:results (jepsen/run! (dirty-read-test tarball)))))
10 |       (recur))))
11 | 


--------------------------------------------------------------------------------
/test/jepsen/voltdb/multi_test.clj:
--------------------------------------------------------------------------------
 1 | (ns jepsen.voltdb.multi-test
 2 |   (:require [clojure.test :refer :all]
 3 |             [jepsen.voltdb-test :refer [tarball]]
 4 |             [jepsen.voltdb.multi :refer :all]
 5 |             [jepsen.core :as jepsen]))
 6 | 
 7 | (defn run [t]
 8 |   (is (:valid? (:results (jepsen/run! t)))))
 9 | 
10 | (deftest a-test
11 |   (loop []
12 |     (when (run (multi-test {:tarball tarball
13 |                             :procedure-call-timeout 1000
14 |                             :time-limit 200}))
15 |       (recur)
16 |     )))
17 | 


--------------------------------------------------------------------------------
/test/jepsen/voltdb/perf_test.clj:
--------------------------------------------------------------------------------
 1 | (ns jepsen.voltdb.perf-test
 2 |   (:require [clojure.test :refer :all]
 3 |             [jepsen.voltdb-test :refer [tarball]]
 4 |             [jepsen.voltdb.perf :refer :all]
 5 |             [jepsen.core :as jepsen]
 6 |             [clojure.pprint :refer [pprint]]))
 7 | 
 8 | (defn run! [t]
 9 |   (let [res (:results (jepsen/run! t))]
10 |     (or (is (:valid? res))
11 |         (pprint res)
12 |         (println (:error res)))))
13 | 
14 | (deftest a-test
15 |   (let [nodes [:n1 :n2 :n3 :n4 :n5 :n6 :n7 :n8 :n9 :10]]
16 |     (doseq [i [8 1 4 2]
17 |             t [single-perf-test multi-perf-test]]
18 |       (run! (t {:time-limit 100
19 |                 :tarball "http://voltdb.com/downloads/technologies/server/LINUX-voltdb-ent-6.4.jepsen4.tar.gz"
20 |                 :procedure-call-timeout 30000
21 |                 :nodes (take i nodes)})))))
22 | 


--------------------------------------------------------------------------------
/test/jepsen/voltdb/redundant_register_test.clj:
--------------------------------------------------------------------------------
1 | (ns jepsen.voltdb.redundant-register-test
2 |   (:require [clojure.test :refer :all]
3 |             [jepsen.voltdb.redundant-register :refer :all]
4 |             [jepsen.voltdb-test :refer [tarball]]
5 |             [jepsen.core :as jepsen]))
6 | 
7 | (deftest a-test
8 |   (is (:valid? (:results (jepsen/run! (rregister-test tarball))))))
9 | 


--------------------------------------------------------------------------------
/test/jepsen/voltdb/single_test.clj:
--------------------------------------------------------------------------------
 1 | (ns jepsen.voltdb.single-test
 2 |   (:require [clojure.test :refer :all]
 3 |             [jepsen.voltdb-test :refer [tarball]]
 4 |             [jepsen.voltdb.single :refer :all]
 5 |             [jepsen.core :as jepsen]))
 6 | 
 7 | (defn run [t]
 8 |   (is (:valid? (:results (jepsen/run! t)))))
 9 | 
10 | (deftest normal-reads-test
11 |   (loop []
12 |     (when (run (single-test {:tarball tarball
13 |                              :strong-reads? false
14 |                              :procedure-call-timeout 45000
15 |                              :time-limit 300}))
16 |       (recur))))
17 | 
18 | (deftest strong-reads-test
19 |   (loop []
20 |     (when (run (single-test {:tarball tarball
21 |                              :strong-reads? true
22 |                              :procedure-call-timeout 1000
23 |                              :time-limit 50}))
24 |       (recur))))
25 | 
26 | (deftest no-reads-test
27 |   (loop []
28 |     (when (run (single-test {:tarball tarball
29 |                              :no-reads? true
30 |                              :procedure-call-timeout 1000
31 |                              :time-limit 50}))
32 |       (recur))))
33 | 


--------------------------------------------------------------------------------
/test/jepsen/voltdb_test.clj:
--------------------------------------------------------------------------------
 1 | (ns jepsen.voltdb-test
 2 |   (:require [clojure.test :refer :all]
 3 |             [clojure.string :as str]
 4 |             [jepsen.voltdb :refer :all]
 5 |             [jepsen.core :as jepsen]))
 6 | 
 7 | (def tarball (str/trim (slurp "tarball.url")))
 8 | 
 9 | (when (str/blank? tarball)
10 |   (println "Please put the URL to a VoltDB enterprise tarball into a file named `tarball.url`.")
11 |   (System/exit 255))
12 | 


--------------------------------------------------------------------------------
/writeup/VoltDB1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jepsen-io/voltdb/5cdec01103d8dcfd700e1fcc473ab1ff2b14183e/writeup/VoltDB1.pdf


--------------------------------------------------------------------------------
/writeup/bugs:
--------------------------------------------------------------------------------
 1 | https://issues.voltdb.com/browse/ENG-10453?filter=12302
 2 | 
 3 | 
 4 | https://issues.voltdb.com/browse/ENG-10421
 5 | https://issues.voltdb.com/browse/ENG-10389 dirty read
 6 | 
 7 | If a majority of the cluster crashes, rejoining a crashed node to a remaining
 8 | node will fail because the remaining node is going to kill itself, having
 9 | determined it's in the minority.
10 | 
11 | https://issues.voltdb.com/browse/ENG-10453 lost writes
12 | 


--------------------------------------------------------------------------------
/writeup/hstore-endofera.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jepsen-io/voltdb/5cdec01103d8dcfd700e1fcc473ab1ff2b14183e/writeup/hstore-endofera.pdf


--------------------------------------------------------------------------------
/writeup/thanks:
--------------------------------------------------------------------------------
1 | John Hugg
2 | Ruth Morgenstein
3 | Peter Alvaro
4 | Peter Bailis
5 | 


--------------------------------------------------------------------------------